For the following analyses we will require the use of a number of different R packages. We can use the following code to quickly load in the packages and install any packages not previously installed in the R console.
setwd("../data")
if (!require("pacman")) install.packages("pacman")
pacman::p_load_gh("pmartinezarbizu/pairwiseAdonis/pairwiseAdonis", "ropensci/rnaturalearthhires")
pacman::p_load("cowplot", "ggrepel", "ggspatial", "paletteer", "patchwork", "rgdal", "rnaturalearth", "sf", "tidyverse", "reshape2", "MCMC.OTU", "pairwiseAdonis", "RColorBrewer", "Redmonder", "flextable", "lubridate", "officer", "adegenet", "dendextend", "gdata", "ggdendro", "hierfstat", "Imap", "kableExtra", "poppr", "reshape2", "StAMPP", "vcfR", "vegan", "boa", "magick")
options("scipen" = 10)
fknmsSites = read.csv("../data/stephanocoeniaMetaData.csv", header = TRUE)
fknmsSites$depth = factor(fknmsSites$depthZone, levels = levels(fknmsSites$depthZone)[c(2,1)])
fknmsSites$region = factor(fknmsSites$region, levels = levels(fknmsSites$region)[c(4, 1, 3, 2)])
fknmsSites$date = mdy(fknmsSites$date) %>% format("%d %b %Y")
fknmsPops = fknmsSites %>% group_by(region) %>% summarize(latDD = mean(latDD), longDD = mean(longDD), n = n()) %>% droplevels()
## `summarise()` ungrouping output (override with `.groups` argument)
fknmsSampleSites = fknmsSites %>% group_by(region, site, depthZone) %>% summarize(latDD = min(latDD), longDD = min(longDD))
## `summarise()` regrouping output by 'region', 'site' (override with `.groups` argument)
fknmsSampleSites$depthZone = factor(fknmsSampleSites$depthZone, levels = levels(fknmsSampleSites$depthZone)[c(2,1)])
fknmsBounds = read.csv("../data/fknmsSPA.csv", header = TRUE)
states = st_as_sf(ne_states(country = c("United States of America")), scale = "large")
countries = st_as_sf(ne_countries(country = c("Cuba", "Mexico", "The Bahamas"), scale = "large"))
florida = read_sf("../data/shp/flKeys.shp") %>% st_transform(crs = 4326)
bathy = read_sf("../data/shp/flBathy.shp") %>% st_transform(crs = 4326) %>% subset(subset = DATASET %in% c("fl_shelf", "fl_coast"))
tortugasBathy = read_sf("../data/shp/tortugasBathy.shp") %>% st_transform(crs = 4326)
Next we build a hi-res polygon of FL with the study site marked and a zoomed in map of the colony locations. We use ggspatial to add a north arrow and scale bar to the main map.
flPal = paletteer_d("vapoRwave::jazzCup")[c(2:5)]
boundPal = c("gray30", paletteer_d("vapoRwave::vapoRwave")[10])
floridaMap = ggplot() +
geom_polypath(data = fknmsBounds[fknmsBounds$type == "Sanctuary",], aes(x = long, y = lat, group = location), alpha = 0.1, fill = "black", color = NA) +
geom_polypath(data = fknmsBounds[fknmsBounds$location == "FKNMS2",], aes(x = long, y = lat), fill = "aliceblue", color = NA) +
geom_polypath(data = fknmsBounds, aes(x = long, y = lat, color = type, group = location), fill = NA) +
scale_fill_manual(values = flPal, name = "Region") +
scale_color_manual(values = boundPal, name = "Boundaries", labels = c("FKNMS", "SPA")) +
geom_point(data = fknmsSites, aes(x = latDD, y = longDD, shape = depth), size = 0) +
scale_shape_manual(values = c(24, 25), name = "Depth") +
geom_sf(data = states, fill = "white", size = 0.25) +
geom_sf(data = countries, fill = "white", size = 0.25) +
geom_segment(aes(x = -80.1, y = 25, xend = -76.58, yend = 26.455), size = 0.25) +
geom_segment(aes(x = -80.4, y = 25.3, xend = -79.94, yend = 29.77), size = 0.25) +
geom_segment(aes(x = -81.75, y = 24.4, xend = -87.01, yend = 26.17), size = 0.25) +
geom_segment(aes(x = -81.45, y = 24.7, xend = -83.64, yend = 29.48), size = 0.25) +
geom_segment(aes(x = -82.95, y = 24.45, xend = -85.81, yend = 22.13), size = 0.25) +
geom_segment(aes(x = -82.95, y = 24.75, xend = -85.81, yend = 25.45), size = 0.25) +
geom_point(data = fknmsPops, aes(x = longDD, y = latDD, fill = region), shape = 21, size = 2) +
geom_rect(aes(xmin = -80.4, xmax = -80.1, ymin = 25, ymax = 25.3), color = paletteer_d("vapoRwave::vapoRwave")[6], fill = NA, size = 0.4) +
geom_rect(aes(xmin = -81.75, xmax = -81.45, ymin = 24.4, ymax = 24.7), color = paletteer_d("vapoRwave::vapoRwave")[6], fill = NA, size = 0.4) +
geom_rect(aes(xmin = -83.25, xmax = -82.95, ymin = 24.45, ymax = 24.75), color = paletteer_d("vapoRwave::vapoRwave")[6], fill = NA, size = 0.4)+
coord_sf(xlim = c(-89, -77), ylim = c(22, 31)) +
scale_x_continuous(breaks = c(seq(-90, -76, by = 2))) +
scale_y_continuous(breaks = c(seq(22, 32, by = 2))) +
annotation_scale(location = "bl") +
annotation_north_arrow(location = "tr", which_north = "true", style = north_arrow_minimal()) +
guides(fill = guide_legend(override.aes = list(shape = 22, color = NA, size = 4), ncol = 2, order = 1), shape = guide_legend(override.aes = list(size = 3), order = 2), color = guide_legend(override.aes = list(fill = "black", alpha = 0.1), order = 3)) +
theme_bw() +
theme(panel.background = element_rect(fill = "aliceblue"),
panel.border = element_rect(color = "black", size = 0.75, fill = NA),
plot.background = element_blank(),
axis.title = element_blank(),
axis.ticks = element_line(color = "black"),
axis.text = element_text(color = "black"),
legend.position = "bottom",
legend.direction = "vertical",
legend.box = "horizontal")
inset = ggplot() +
geom_polypath(data = fknmsBounds[fknmsBounds$type == "Sanctuary",], aes(x = long, y = lat, group = location), alpha = 0.1, fill = "black", color = NA) +
geom_polypath(data = fknmsBounds[fknmsBounds$location == "FKNMS2",], aes(x = long, y = lat), fill = "aliceblue", color = NA) +
geom_segment(aes(x = -82.9645, xend = -82.4, y = 24.6, yend = 24.6), color = "gray92", size = .55) +
geom_sf(data = bathy, color = "gray75", size = 0.25) +
geom_polypath(data = fknmsBounds, aes(x = long, y = lat, color = type, group = location), fill = NA) +
scale_fill_manual(values = flPal, name = "Region") +
scale_color_manual(values = boundPal, name = "Boundaries", labels = c("FKNMS", "SPA")) +
geom_point(data = fknmsSampleSites, aes(x = longDD, y = latDD, fill = region, shape = depthZone), size = 1.5) +
geom_sf(data = florida, fill = "white", size = 0.25) +
scale_shape_manual(values = c(24, 25), name = "Depth") +
theme_bw() +
theme(legend.title = element_text(size = 9, face = "bold"),
axis.ticks = element_blank(),
axis.text = element_blank(),
axis.title = element_blank(),
panel.background = element_rect(fill = "aliceblue"),
panel.border = element_rect(color = "black", size = 0.75, fill = NA),
legend.text = element_text(size = 9),
legend.position = "none",
plot.background = element_blank())
upperKeys = inset +
annotation_scale(location = "br") +
coord_sf(xlim = c(-80.4, -80.1), ylim = c(25.0, 25.3)) +
scale_x_continuous(breaks = c(seq(-80.4, -80.0, by = .1))) +
scale_y_continuous(breaks = c(seq(25.0, 25.3, by = .1)))
lowerKeys = inset +
annotation_scale(location = "br") +
coord_sf(xlim = c(-81.75, -81.45), ylim = c(24.4, 24.7)) +
scale_x_continuous(breaks = c(seq(-81.7, -81.3, by = .1))) +
scale_y_continuous(breaks = c(seq(24.4, 24.7, by = .1)))
dryTortugas = ggplot() +
geom_polypath(data = fknmsBounds[fknmsBounds$type == "Sanctuary",], aes(x = long, y = lat, group = location), alpha = 0.1, fill = "black", color = NA) +
geom_polypath(data = fknmsBounds[fknmsBounds$location == "FKNMS2",], aes(x = long, y = lat), fill = "aliceblue", color = NA) +
geom_segment(aes(x = -82.9645, xend = -82.4, y = 24.6, yend = 24.6), color = "gray92", size = .55) +
geom_sf(data = tortugasBathy, color = "gray75", size = 0.25) +
geom_polypath(data = fknmsBounds, aes(x = long, y = lat, color = type), fill = NA) +
# scale_fill_paletteer_d("wesanderson::GrandBudapest1", name = "Region") +
scale_fill_manual(values = flPal, name = "Region") +
scale_color_manual(values = boundPal, name = "Boundaries", labels = c("FKNMS", "SPA")) +
geom_point(data = fknmsSites, aes(x = longDD, y = latDD, fill = region, shape = depth), size = 1.5) +
geom_sf(data = florida, fill = "white", size = 0.25) +
scale_shape_manual(values = c(24, 25), name = "Depth") +
annotation_scale(location = "br") +
coord_sf(xlim = c(-83.25, -82.95), ylim = c(24.45, 24.75)) +
scale_x_continuous(breaks = c(seq(-83.2, -82.9, by = .1))) +
scale_y_continuous(breaks = c(seq(24.4, 24.7, by = .1))) +
theme_bw() +
theme(legend.title = element_text(size = 9, face = "bold"),
axis.ticks = element_blank(),
axis.text = element_blank(),
axis.title = element_blank(),
panel.background = element_rect(fill = "aliceblue"),
panel.border = element_rect(color = "black", size = 0.75, fill = NA),
legend.text = element_text(size = 9),
legend.position = "none",
plot.background = element_blank())
stephanoPic = image_read("../figures/stephano.png") %>%
image_border("black", "23x23")
fknmsMap = ggdraw() +
draw_plot(floridaMap) +
draw_plot(upperKeys, x = .71, y = 0.572, width = 0.29, height = 0.29) +
draw_plot(lowerKeys, x = 0.217, y = 0.55, width = 0.29, height = 0.29) +
draw_plot(dryTortugas, x = 0.065, y = 0.235, width = 0.29, height = 0.29) +
draw_image(stephanoPic, x = 0.735, y = 0.215, width = 0.24, height = 0.254)
ggsave("../figures/map.png", plot = fknmsMap, width = 16, height = 16, units = "cm", dpi = 300)
ggsave("../figures/figure1.tiff", plot = fknmsMap, width = 16, height = 16, units = "cm", dpi = 300)
levels(fknmsSites$collection) = c("SCUBA", "Tech Dive")
sampleData = fknmsSites %>% group_by(region, site)%>% summarize(latDD = first(latDD), longDD = first(longDD),depthZone = (first(depthZone)), depthRange = paste(min(depthM), "--", max(depthM), sep = ""), n = n(), date = as.character(min(date)), collection = as.character(first(collection)))%>% droplevels() %>% as.data.frame()
## `summarise()` regrouping output by 'region' (override with `.groups` argument)
sampleTab = sampleData
levels(sampleTab$collection)
## NULL
colnames(sampleTab) = c("Region", "Site", "Latitude", "Longitude", "Depth zone", "Sampling \ndepth (m)", "n", "Sampling date", "Sampling \nmethod")
sampleTab$Region
## [1] Upper Keys Upper Keys Upper Keys Upper Keys Upper Keys Lower Keys
## [7] Lower Keys Lower Keys Tortugas Bank Tortugas Bank Tortugas Bank Tortugas Bank
## [13] Riley's Hump Riley's Hump Riley's Hump Riley's Hump Riley's Hump
## Levels: Upper Keys Lower Keys Tortugas Bank Riley's Hump
finalTabRegion = c("Upper Keys", "", "", "", "", "Lower Keys", "", "", "Tortugas Bank", "", "", "", "Riley's Hump", "", "", "", "")
sampleTab$Region = finalTabRegion
sampleTab$Site = as.character(sampleTab$Site)
sampleTab$Site[1] = paste("Ian's Lumps")
sampleTabPub = sampleTab %>% flextable() %>%
flextable::compose(part = "header", j = "n", value = as_paragraph(as_i("n"))) %>%
font(fontname = "Times New Roman", part = "all") %>%
fontsize(size = 10, part = "all") %>%
bold(part = "header") %>%
align(align = "left", part = "all") %>%
autofit()
table1 = read_docx()
table1 = body_add_flextable(table1, value = sampleTabPub)
print(table1, target = "../tables/table1.docx")
sampleTabPub
Region | Site | Latitude | Longitude | Depth zone | Sampling | n | Sampling date | Sampling |
Upper Keys | Ian's Lumps | 25.15823 | -80.22064 | Mesophotic | 43.3--43.9 | 4 | 27 Aug 2019 | Tech Dive |
Site 1 | 25.21755 | -80.19341 | Mesophotic | 41.8--45.4 | 28 | 28 Aug 2019 | Tech Dive | |
Site 48 | 25.22057 | -80.20086 | Shallow | 24.7--28.3 | 15 | 28 Aug 2019 | SCUBA | |
Site 49 | 25.14405 | -80.25259 | Shallow | 18.3--20.4 | 7 | 27 Aug 2019 | SCUBA | |
Site 51 | 25.14416 | -80.25205 | Shallow | 19.8--23.8 | 10 | 27 Aug 2019 | SCUBA | |
Lower Keys | Site 45 | 24.49396 | -81.58831 | Mesophotic | 31.4--35.1 | 30 | 26 Aug 2019 | Tech Dive |
Site 46 | 24.49297 | -81.59893 | Shallow | 17.1--18.6 | 26 | 26 Aug 2019 | SCUBA | |
Site 47 | 24.49105 | -81.60603 | Shallow | 18--19.2 | 4 | 26 Aug 2019 | SCUBA | |
Tortugas Bank | Site 17 | 24.65211 | -83.10309 | Mesophotic | 33.5--35.1 | 9 | 22 Aug 2019 | Tech Dive |
Site 33 | 24.66020 | -83.07890 | Shallow | 14.6--14.6 | 1 | 22 Aug 2019 | SCUBA | |
Site 35/36 | 24.64042 | -83.10294 | Mesophotic | 28.7--31.1 | 21 | 23 Aug 2019 | Tech Dive | |
Site 37 | 24.67519 | -83.06858 | Shallow | 16.8--21 | 24 | 23 Aug 2019 | SCUBA | |
Riley's Hump | Site 18 | 24.49396 | -83.09608 | Mesophotic | 31.4--34.1 | 5 | 24 Aug 2019 | Tech Dive |
Site 19 | 24.48926 | -83.12269 | Mesophotic | 37.5--37.5 | 1 | 24 Aug 2019 | Tech Dive | |
Site 39 | 24.51136 | -83.09516 | Shallow | 25.9--28 | 12 | 24 Aug 2019 | SCUBA | |
Site 40 | 24.49239 | -83.12151 | Shallow | 25.3--26.5 | 20 | 25 Aug 2019 | SCUBA | |
Site 41 | 24.48796 | -83.11390 | Mesophotic | 30.8--35.7 | 9 | 25 Aug 2019 | Tech Dive |
Analyzing 2bRAD generated SNPs (XXX loci) for population structure//genetic differentiation across sites and depth zones in FKNMS
Identification of any natural clones using technical replicates as a baseline for clonality between samples.
cloneBams = read.csv("../data/stephanocoeniaMetaData.csv") # list of bam files
cloneMa = as.matrix(read.table("../data/snps/clones/sintClones.ibsMat")) # reads in IBS matrix produced by ANGSD
dimnames(cloneMa) = list(cloneBams[,1],cloneBams[,1])
clonesHc = hclust(as.dist(cloneMa),"ave")
clonePops = cloneBams$region
cloneDepth = cloneBams$depthZone
cloneDend = cloneMa %>% as.dist() %>% hclust(.,"ave") %>% as.dendrogram()
cloneDData = cloneDend %>% dendro_data()
# Making the branches hang shorter so we can easily see clonal groups
cloneDData$segments$yend2 = cloneDData$segments$yend
for(i in 1:nrow(cloneDData$segments)) {
if (cloneDData$segments$yend2[i] == 0) {
cloneDData$segments$yend2[i] = (cloneDData$segments$y[i] - 0.01)}}
cloneDendPoints = cloneDData$labels
cloneDendPoints$pop = clonePops[order.dendrogram(cloneDend)]
cloneDendPoints$depth=cloneDepth[order.dendrogram(cloneDend)]
rownames(cloneDendPoints) = cloneDendPoints$label
# Making points at the leaves to place symbols for populations
point = as.vector(NA)
for(i in 1:nrow(cloneDData$segments)) {
if (cloneDData$segments$yend[i] == 0) {
point[i] = cloneDData$segments$y[i] - 0.01
} else {
point[i] = NA}}
cloneDendPoints$y = point[!is.na(point)]
techReps = c("S066.1", "S066.2", "S066.3", "S162.1", "S162.2", "S162.3", "S205.1", "S205.2", "S205.3")
cloneDendPoints$depth = factor(cloneDendPoints$depth,levels(cloneDendPoints$depth)[c(2,1)])
cloneDendPoints$pop = factor(cloneDendPoints$pop,levels(cloneDendPoints$pop)[c(4, 1, 3, 2)])
flPal = paletteer_d("vapoRwave::jazzCup")[c(2:5)]
cloneDendA = ggplot() +
geom_segment(data = segment(cloneDData), aes(x = x, y = y, xend = xend, yend = yend2), size = 0.5) +
geom_point(data = cloneDendPoints, aes(x = x, y = y, fill = pop, shape = depth), size = 4, stroke = 0.25) +
#scale_fill_brewer(palette = "Dark2", name = "Population") +
scale_fill_manual(values = flPal, name= "Population")+
scale_shape_manual(values = c(24, 25), name = "Depth Zone")+
geom_hline(yintercept = 0.12, color = "red", lty = 5, size = 0.75) + # creating a dashed line to indicate a clonal distance threshold
geom_text(data = subset(cloneDendPoints, subset = label %in% techReps), aes(x = x, y = (y - .015), label = label), angle = 90) + # spacing technical replicates further from leaf
geom_text(data = subset(cloneDendPoints, subset = !label %in% techReps), aes(x = x, y = (y - .010), label = label), angle = 90) +
labs(y = "Genetic distance (1 - IBS)") +
guides(fill = guide_legend(override.aes = list(shape = 22)))+
theme_classic()
cloneDend = cloneDendA + theme(
axis.title.x = element_blank(),
axis.text.x = element_blank(),
axis.line.x = element_blank(),
axis.ticks.x = element_blank(),
axis.title.y = element_text(size = 12, color = "black", angle = 90),
axis.text.y = element_text(size = 10, color = "black"),
axis.line.y = element_line(),
axis.ticks.y = element_line(),
panel.grid = element_blank(),
panel.border = element_blank(),
panel.background = element_blank(),
plot.background = element_blank(),
legend.key = element_blank(),
legend.title = element_text(size = 12),
legend.text = element_text(size = 10),
legend.position = "bottom")
# cloneDend
ggsave("../figures/cloneDend.png", plot = cloneDend, height = 8, width = 35, units = "in", dpi = 300)
ggsave("../figures/cloneDend.eps", plot = cloneDend, height = 8, width = 35, units = "in", dpi = 300)
We removed the technical replicates/clones and re-ran ANGSD for all the following pop-gen analyses.
bams = read.csv("../data/stephanocoeniaMetaData.csv")[-c(66,68,164,166,209,211),] # list of bams files and their populationsbams = bams[-c(66,68,164,166,209,211),]
ma = as.matrix(read.table("../data/snps/sintNoClones.ibsMat")) # reads in IBS matrix produced by ANGSD
dimnames(ma) = list(bams[,1],bams[,1])
hc = hclust(as.dist(ma),"ave")
pops = bams$region
depth = bams$depthZone
dend = ma %>% as.dist() %>% hclust(.,"ave") %>% as.dendrogram()
dData = dend %>% dendro_data()
dendPoints = dData$labels
dendPoints$pop = pops[order.dendrogram(dend)]
dendPoints$depth=depth[order.dendrogram(dend)]
rownames(dendPoints) = dendPoints$label
dendPoints$depth = factor(dendPoints$depth,levels(dendPoints$depth)[c(2,1)])
dendPoints$pop = factor(dendPoints$pop,levels(dendPoints$pop)[c(4, 1, 3, 2)])
flPal = paletteer_d("vapoRwave::jazzCup")[c(2:5)]
dendA = ggplot() +
geom_segment(data = segment(dData), aes(x = x, y = y, xend = xend, yend = yend), size = 0.5) +
geom_point(data = dendPoints, aes(x = x, y = y, fill = pop, shape = depth), size = 4, stroke = 0.25) +
#scale_fill_brewer(palette = "Dark2", name = "Population") +
scale_fill_manual(values = flPal, name= "Region")+
scale_shape_manual(values = c(24, 25), name = "Depth Zone")+
# spacing technical replicates further from leaf
# geom_text(data = dendPoints, aes(x = x, y = (y - .025), label = label), angle = 90) +
labs(y = "Genetic distance (1 - IBS)") +
guides(fill = guide_legend(override.aes = list(shape = 22)))+
theme_classic()
dend = dendA + theme(
axis.title.x = element_blank(),
axis.text.x = element_blank(),
axis.line.x = element_blank(),
axis.ticks.x = element_blank(),
axis.title.y = element_text(size = 24, color = "black", angle = 90),
axis.text.y = element_text(size = 20, color = "black"),
axis.line.y = element_line(),
axis.ticks.y = element_line(),
panel.grid = element_blank(),
panel.border = element_blank(),
panel.background = element_blank(),
plot.background = element_blank(),
legend.key = element_blank(),
legend.title = element_text(size = 24),
legend.text = element_text(size = 20),
legend.position = "bottom")
# dend
ggsave("../figures/dend.png", plot = dend, height = 6, width = 37, units = "in", dpi = 300)
ggsave("../figures/dend.eps", plot = dend, height = 6, width = 37, units = "in", dpi = 300)
sintVcf = read.vcfR("../data/snps/sintNoClones.bcf", verbose = FALSE)
sintGenlightPopDepth = vcfR2genlight(sintVcf, n.cores = 2) # Converts the vcf file into a file format that poppr uses the "genlight" format
locNames(sintGenlightPopDepth) = paste(sintVcf@fix[,1],sintVcf@fix[,2],sep="_")
popData = read.csv("../data/stephanocoeniaMetaData.csv")[-c(66,68,164,166,209,211),] %>% select("sample" = tubeID, "pop" = region, "depth" = depthZone) # Reads in population data for each sample
popData$popdepth = paste(popData$pop, popData$depth, sep = " ")
strata(sintGenlightPopDepth) = data.frame(popData)
setPop(sintGenlightPopDepth) = ~popdepth
amova <- poppr.amova(sintGenlightPopDepth, ~popdepth) #Runs AMOVA
amova
## $call
## ade4::amova(samples = xtab, distances = xdist, structures = xstruct)
##
## $results
## Df Sum Sq Mean Sq
## Between popdepth 7 35867.77 5123.967
## Between samples Within popdepth 212 680293.73 3208.933
## Within samples 220 562159.50 2555.270
## Total 439 1278321.00 2911.893
##
## $componentsofcovariance
## Sigma %
## Variations Between popdepth 34.98402 1.19928
## Variations Between samples Within popdepth 326.83112 11.20403
## Variations Within samples 2555.27045 87.59669
## Total variations 2917.08559 100.00000
##
## $statphi
## Phi
## Phi-samples-total 0.1240331
## Phi-samples-popdepth 0.1134003
## Phi-popdepth-total 0.0119928
set.seed(694)
amovasignif <- randtest(amova, nrepet = 99) #Calculates significance levels of the AMOVA with 99 permutations
amovasignif$names
## [1] "Variations within samples" "Variations between samples"
## [3] "Variations between popdepth"
amovasignif$obs
## [1] 2555.27045 326.83112 34.98402
amovasignif$pvalue
## [1] 0.01 0.01 0.01
amovaPerc = paste(round(amova$componentsofcovariance$`%`[1], 2), "%",sep="")
amovaP = amovasignif$pvalue[3]
sintMa = as.matrix(read.table("../data/snps/sintNoClones.ibsMat"))
sintMds = cmdscale(sintMa, eig = TRUE, x.ret = TRUE)
# Determine percent variation captured on each axis
# Calculate the eigenvalues so later we can figure out % variation shown on each Principal Coordinate
sintPcoaVar = round(sintMds$eig/sum(sintMds$eig)*100, 1)
head(sintPcoaVar)
## [1] 9.4 4.1 3.3 0.7 0.7 0.6
# Format data to plot
sintPcoaValues = sintMds$points
head(sintPcoaValues)
## [,1] [,2]
## [1,] 0.02401828 0.02621462
## [2,] 0.02802245 -0.06975987
## [3,] 0.02920866 -0.07022738
## [4,] 0.02727511 0.02184773
## [5,] 0.02735482 0.02309735
## [6,] 0.03013232 0.02006783
sintI2P = read.csv("../data/stephanocoeniaMetaData.csv")[-c(66,68,164,166,209,211),] %>% select("sample" = tubeID, "pop" = region, "depth" = depthZone) # 2-column tab-delimited table of individual assignments to populations; must be in the same order as samples in the bam list or vcf file.
row.names(sintI2P) = sintI2P[,1]
sintI2P$popdepth = paste(sintI2P$pop, sintI2P$depth, sep = " ")
sintPcoaValues=cbind(sintI2P, sintPcoaValues)
sintPcoaValues =as.data.frame(sintPcoaValues, sample = rownames(sintPcoaValues))
colnames(sintPcoaValues)[c(5,6)] = c("PCo1", "PCo2")
head(sintPcoaValues)
## sample pop depth popdepth PCo1 PCo2
## SFK001 SFK001 Tortugas Bank Mesophotic Tortugas Bank Mesophotic 0.02401828 0.02621462
## SFK002 SFK002 Tortugas Bank Mesophotic Tortugas Bank Mesophotic 0.02802245 -0.06975987
## SFK003 SFK003 Tortugas Bank Mesophotic Tortugas Bank Mesophotic 0.02920866 -0.07022738
## SFK004 SFK004 Tortugas Bank Mesophotic Tortugas Bank Mesophotic 0.02727511 0.02184773
## SFK005 SFK005 Tortugas Bank Mesophotic Tortugas Bank Mesophotic 0.02735482 0.02309735
## SFK006 SFK006 Tortugas Bank Mesophotic Tortugas Bank Mesophotic 0.03013232 0.02006783
sintPCoA = merge(sintPcoaValues, aggregate(cbind(mean.x=PCo1,mean.y=PCo2)~popdepth, sintPcoaValues, mean), by="popdepth")
sintPCoA$depth = factor(sintPCoA$depth, levels(sintPCoA$depth)[c(2,1)])
sintPCoA$pop = factor(sintPCoA$pop, levels(sintPCoA$pop)[c(4, 1, 3, 2)])
# SNP PCoA biplot
flPal = paletteer_d("vapoRwave::jazzCup")[c(2:5)]
sintPcoaPlotA = ggplot(sintPCoA, aes(x = PCo1, y = PCo2, color = pop, fill = pop, shape = depth, linetype = depth)) +
geom_hline(yintercept = 0, color = "gray90", size = 0.5) +
geom_vline(xintercept = 0, color = "gray90", size = 0.5) +
# stat_ellipse(data = sintPCoA, type = "t", geom = "polygon", alpha = 0.1) + #ellipse
scale_linetype_manual(values = c(1,2), name = "Depth Zone")+
geom_point(aes(x = PCo1, y = PCo2, shape = depth), size = 3, alpha = 0.3, show.legend = FALSE) + #individual's points indicated by circles
scale_shape_manual(values = c(24,25), name = "Depth Zone") +
geom_point(aes(x = mean.x, y = mean.y, shape = depth), size = 5, color = "black") + #population centroids indicated by triangles
stat_ellipse(data = sintPCoA, type = "t", geom = "polygon", alpha = 0.1, fill = NA) +
annotate(geom = "text", x = 0.1, y = -0.15, label = bquote("AMOVA:"~.(amovaPerc)*","~italic(p)~" = "~.(amovaP))) +
scale_fill_manual(values = flPal, name = "Region") +
scale_color_manual(values = flPal, guide = NULL) +
xlab(paste ("PCo 1 (", sintPcoaVar[1],"%)", sep = "")) + #Prints percent variation explained by first axis
ylab(paste ("PCo 2 (", sintPcoaVar[2],"%)", sep = "")) + #Prints percent variation explained by second axis
guides(shape = guide_legend(order = 2), linetype = guide_legend(override.aes = list(linetype = c(1,2), alpha = 1, color = "black", fill = NA), order = 3), fill = guide_legend(override.aes = list(shape = 22, size = 5, color = NA, alpha = NA), order = 1))+
theme_bw()
sintPcoaPlot = sintPcoaPlotA +
theme(axis.title.x = element_text(color = "black", size = 10),
axis.text.x = element_blank(),
axis.ticks.x = element_blank(),
axis.line.x = element_blank(),
axis.title.y = element_text(color = "black", size = 10),
axis.text.y = element_blank(),
axis.ticks.y = element_blank(),
axis.line.y = element_blank(),
legend.position = "right",
panel.border = element_rect(color = "black", size = 1),
panel.background = element_rect(fill = "white"),
plot.background = element_rect(fill = "white"),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank())
sintPcoaPlot
ggsave("../figures/pcoaPlot.png", plot = sintPcoaPlot, height = 3.5, width = 7, units = "in", dpi = 300)
ggsave("../figures/pcoaPlot.tiff", plot = sintPcoaPlot, height = 3.5, width = 7, units = "in", dpi = 300)
sintVcf = read.vcfR("../data/snps/sintNoClones.bcf", verbose = FALSE)
sintGenlightPopDepth = vcfR2genlight(sintVcf, n.cores = 2) # Converts the vcf file into a file format that poppr uses the "genlight" format
locNames(sintGenlightPopDepth) = paste(sintVcf@fix[,1],sintVcf@fix[,2],sep="_")
popData = read.csv("../data/stephanocoeniaMetaData.csv")[-c(66, 68, 164, 166, 209, 211),] %>% select("sample" = tubeID, "pop" = region, "depth" = depthZone) # Reads in population data for each sample
popData$popdepth = paste(popData$pop, popData$depth, sep = " ")
strata(sintGenlightPopDepth) = data.frame(popData)
setPop(sintGenlightPopDepth) = ~popdepth
sintGenlightPopDepth$pop = factor(sintGenlightPopDepth$pop,
levels(sintGenlightPopDepth$pop)[c(7, 8, 6, 5, 2, 1, 4, 3)])
levels(sintGenlightPopDepth$pop) = c("Upper Keys \nShallow", "Upper Keys \nMesophotic", "Lower Keys \nShallow", "Lower Keys \nMesophotic", "Tortugas Bank \nShallow", "Tortugas Bank \nMesophotic", "Riley's Hump \nShallow", "Riley's Hump \nMesophotic")
set.seed(694)
fk.fst <- stamppFst(sintGenlightPopDepth, nboots = 99, percent = 95, nclusters = 2) #99 permutations
fk.fst$Fsts
## Tortugas Bank \nMesophotic Tortugas Bank \nShallow
## Tortugas Bank \nMesophotic NA NA
## Tortugas Bank \nShallow 0.021928230 NA
## Riley's Hump \nMesophotic 0.002397382 0.012550165
## Riley's Hump \nShallow 0.010785004 0.006637748
## Lower Keys \nMesophotic 0.002930507 0.029957697
## Lower Keys \nShallow 0.021249108 0.005407182
## Upper Keys \nShallow 0.019104757 0.014108148
## Upper Keys \nMesophotic 0.001165877 0.027396216
## Riley's Hump \nMesophotic Riley's Hump \nShallow
## Tortugas Bank \nMesophotic NA NA
## Tortugas Bank \nShallow NA NA
## Riley's Hump \nMesophotic NA NA
## Riley's Hump \nShallow 0.0003676029 NA
## Lower Keys \nMesophotic 0.0078265244 0.013753336
## Lower Keys \nShallow 0.0061114733 -0.002212034
## Upper Keys \nShallow 0.0049598707 -0.001557742
## Upper Keys \nMesophotic 0.0052272451 0.012581289
## Lower Keys \nMesophotic Lower Keys \nShallow
## Tortugas Bank \nMesophotic NA NA
## Tortugas Bank \nShallow NA NA
## Riley's Hump \nMesophotic NA NA
## Riley's Hump \nShallow NA NA
## Lower Keys \nMesophotic NA NA
## Lower Keys \nShallow 0.0255855830 NA
## Upper Keys \nShallow 0.0211184139 -0.001759316
## Upper Keys \nMesophotic -0.0002008376 0.024025484
## Upper Keys \nShallow Upper Keys \nMesophotic
## Tortugas Bank \nMesophotic NA NA
## Tortugas Bank \nShallow NA NA
## Riley's Hump \nMesophotic NA NA
## Riley's Hump \nShallow NA NA
## Lower Keys \nMesophotic NA NA
## Lower Keys \nShallow NA NA
## Upper Keys \nShallow NA NA
## Upper Keys \nMesophotic 0.019923 NA
fk.fst$Pvalues
## Tortugas Bank \nMesophotic Tortugas Bank \nShallow
## Tortugas Bank \nMesophotic NA NA
## Tortugas Bank \nShallow 0 NA
## Riley's Hump \nMesophotic 0 0
## Riley's Hump \nShallow 0 0
## Lower Keys \nMesophotic 0 0
## Lower Keys \nShallow 0 0
## Upper Keys \nShallow 0 0
## Upper Keys \nMesophotic 0 0
## Riley's Hump \nMesophotic Riley's Hump \nShallow
## Tortugas Bank \nMesophotic NA NA
## Tortugas Bank \nShallow NA NA
## Riley's Hump \nMesophotic NA NA
## Riley's Hump \nShallow 0.1313131 NA
## Lower Keys \nMesophotic 0.0000000 0
## Lower Keys \nShallow 0.0000000 1
## Upper Keys \nShallow 0.0000000 1
## Upper Keys \nMesophotic 0.0000000 0
## Lower Keys \nMesophotic Lower Keys \nShallow
## Tortugas Bank \nMesophotic NA NA
## Tortugas Bank \nShallow NA NA
## Riley's Hump \nMesophotic NA NA
## Riley's Hump \nShallow NA NA
## Lower Keys \nMesophotic NA NA
## Lower Keys \nShallow 0.0000000 NA
## Upper Keys \nShallow 0.0000000 1
## Upper Keys \nMesophotic 0.8989899 0
## Upper Keys \nShallow Upper Keys \nMesophotic
## Tortugas Bank \nMesophotic NA NA
## Tortugas Bank \nShallow NA NA
## Riley's Hump \nMesophotic NA NA
## Riley's Hump \nShallow NA NA
## Lower Keys \nMesophotic NA NA
## Lower Keys \nShallow NA NA
## Upper Keys \nShallow NA NA
## Upper Keys \nMesophotic 0 NA
pop.order = c("Riley's Hump \nMesophotic", "Riley's Hump \nShallow", "Tortugas Bank \nMesophotic", "Tortugas Bank \nShallow", "Lower Keys \nMesophotic", "Lower Keys \nShallow", "Upper Keys \nMesophotic", "Upper Keys \nShallow")
# reads in fst matrix
snpFstMa <- as.matrix(fk.fst$Fsts)
upperTriangle(snpFstMa, byrow = TRUE) <- lowerTriangle(snpFstMa)
snpFstMa <- snpFstMa[,pop.order] %>%
.[pop.order,]
snpFstMa[upper.tri(snpFstMa)] <- NA
snpFstMa <- as.data.frame(snpFstMa)
snpFstMa$Pop = factor(row.names(snpFstMa), levels = unique(pop.order))
snpQMa <- as.matrix(fk.fst$Pvalues)
upperTriangle(snpQMa, byrow=TRUE) <- lowerTriangle(snpQMa)
snpQMa <- snpQMa[,pop.order] %>%
.[pop.order,]
snpQMa[upper.tri(snpQMa)] <- NA
snpQMa <- as.data.frame(snpQMa)
snpQMa$Pop = factor(row.names(snpQMa), levels = unique(pop.order))
snpFstMa$Pop = factor(row.names(snpFstMa), levels = unique(pop.order))
snpFst = melt(snpFstMa, id.vars = "Pop", value.name = "Fst", variable.name = "Pop2", na.rm = FALSE)
snpFst = snpFst[snpFst$Pop != snpFst$Pop2,]
snpFst$Fst = round(snpFst$Fst, 3)
snpFst = snpFst %>% mutate(Fst = replace(Fst, Fst < 0, 0))
snpQ = melt(snpQMa, id.vars = "Pop", value.name = "Pval", variable.name = "Pop2", na.rm = FALSE)
snpQ = snpQ[snpQ$Pop != snpQ$Pop2,]
snpQ$Qval = p.adjust(snpQ$Pval, method = "BH")
snpHeatmapA = ggplot(data = snpFst, aes(Pop, Pop2, fill = Fst))+
geom_tile(color = "white")+
scale_fill_gradient(low = "white", high = "#BF5E49",limit = c(0, 0.03),
space = "Lab", name = expression(paste(italic("F")[ST])), na.value = "white")+
geom_text(data = snpFst, aes(Pop, Pop2, label = Fst), color = ifelse(snpQ$Qval <= 0.05,"black", "darkgrey"), size = ifelse(snpQ$Qval < 0.05, 6, 5), fontface = ifelse (snpQ$Qval < 0.05, "bold", "plain")) +
guides(fill=guide_colorbar(barwidth = 12, barheight = 1, title.position = "top", title.hjust = 0.5))+
scale_y_discrete(position = "left")+
scale_x_discrete(limits = rev(levels(snpFst$Pop))[c(1:8)]) +
theme_minimal()
snpHeatmap = snpHeatmapA + theme(
axis.text.x = element_text(vjust = 1, size = 16, hjust = 0.5, color = "black"),
axis.text.y = element_text(size = 16, color = "black"),
axis.title.x = element_blank(),
axis.title.y = element_blank(),
panel.grid.major = element_blank(),
panel.border = element_blank(),
panel.background = element_blank(),
axis.ticks = element_blank(),
legend.position = c(0.5, 0.9),
legend.direction = "horizontal",
legend.title = element_text(size = 16),
legend.text = element_text(size = 14),
plot.title = element_text(size = 16)
)
# snpHeatmap
ggsave("../figures/fstHeatMap.png", plot = snpHeatmap, width = 36, height = 13, units = "cm", dpi = 300)
ggsave("../figures/fstHeatMap.eps", plot = snpHeatmap, width = 36, height = 13, units = "cm", dpi = 300)
hetAll = read.table("../data/snps/hetAllSites")
colnames(hetAll) = c("sample", "All Sites")
hetAll$sample = str_pad(hetAll$sample, 3, pad = "0")
hetAll$sample = paste("SFK",hetAll$sample, sep ="")
hetSnps = read.table("../data/snps/hetSnps")
colnames(hetSnps) = c("sample", "Variant Sites")
hetSnps$sample = str_pad(hetSnps$sample, 3, pad = "0")
hetSnps$sample = paste("SFK",hetSnps$sample, sep ="")
popData = read.csv("../data/stephanocoeniaMetaData.csv")[-c(66, 68, 164, 166, 209, 211),] %>% select("sample" = tubeID, "Region" = region, "depth" = depthZone) # Reads in population
# popData$popDepth = paste(popData$pop, popData$depth, sep = " ")
het = left_join(popData, hetAll, by = "sample") %>% left_join(hetSnps, by = "sample")
het$Region = factor(het$Region, levels = levels(het$Region)[c(2,3,1,4)])
het$depth = factor(het$depth, levels = levels(het$depth)[c(2,1)])
hetStats = het %>% group_by(Region, depth) %>% dplyr::summarise(N = n(), meanAll = mean(`All Sites`), sdAll = sd(`All Sites`), seAll = sd(`All Sites`)/sqrt(N), meanSnps = mean(`Variant Sites`), sdSnps = sd(`Variant Sites`), seSnps = sd(`Variant Sites`)/sqrt(N))
## `summarise()` regrouping output by 'Region' (override with `.groups` argument)
max(hetStats$meanAll, na.rm = TRUE)
## [1] 0.004352
min(hetStats$meanAll, na.rm = TRUE)
## [1] 0.00406
max(hetStats$meanSnps, na.rm = TRUE)
## [1] 0.23806
min(hetStats$meanSnps, na.rm = TRUE)
## [1] 0.1999967
hetMelt = melt(het, id.vars = c("sample", "Region", "depth"), variable.name = "type", value.name = "heterozygosity")
hetTheme = theme(axis.title.x = element_blank(),
axis.text.x = element_text(color = "black", size = 12),
axis.ticks.x = element_line(color = "black"),
axis.title.y = element_text(color = "black", size = 12),
axis.text.y = element_text(color = "black", size = 10),
axis.ticks.y = element_line(color = "black"),
legend.position = "right",
panel.border = element_rect(color = "black"),
panel.background = element_rect(fill = "white"),
plot.background = element_rect(fill = "white"),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank())
hetPlotAll = ggplot(data = subset(hetMelt, subset = hetMelt$type == "All Sites"),aes(x = depth, y = heterozygosity, fill = Region)) +
geom_jitter(aes(fill = Region),shape = 21, position = position_jitterdodge()) +
geom_boxplot(outlier.colour = NA, alpha = 0.5) +
xlab("Depth") +
ylab("Heterozygosity") +
ggtitle("All Sites") +
scale_fill_manual(values = rev(flPal)) + # scale_color_manual(values = flPal) +
theme_bw() +
hetTheme
hetPlotSnps = ggplot(data = subset(hetMelt, subset = hetMelt$type == "Variant Sites"),aes(x = depth, y = heterozygosity, fill = Region)) +
geom_jitter(aes(fill = Region),shape = 21, position = position_jitterdodge()) +
geom_boxplot(outlier.colour = NA, alpha = 0.5) +
xlab("Depth") +
ylab("Heterozygosity") +
ggtitle("Variant Sites") +
scale_fill_manual(values = rev(flPal)) +
scale_y_continuous(breaks=seq(0.1, 0.5, by = .05)) +
# scale_color_manual(values = flPal) +
theme_bw() +
hetTheme
hetPlots = (hetPlotAll | hetPlotSnps) +
plot_annotation(tag_levels = 'A') +
plot_layout(guides = "collect")&
theme(plot.tag = element_text(size = 16),
legend.position = "bottom")
# hetPlots
ggsave("../figures/heterozygosityPlot.pdf", plot = hetPlots, width = 15, height = 9, units = "cm", dpi = 300)
ggsave("../figures/heterozygosityPlot.png", plot = hetPlots, width = 15, height = 9, units = "cm", dpi = 300)
First we can create a function to calculate geographic distances between lat/lons:
ReplaceLowerOrUpperTriangle = function(m, triangle.to.replace) {
if (nrow(m) != ncol(m))
stop("Supplied matrix must be square.")
if (tolower(triangle.to.replace) == "lower")
tri = lower.tri(m)
else if (tolower(triangle.to.replace) == "upper")
tri = upper.tri(m)
else
stop("triangle.to.replace must be set to 'lower' or 'upper'.")
m[tri] = t(m)[tri]
return(m)
}
# If triangle.to.replace="lower", replaces the lower triangle of a square matrix with its upper triangle.
# If triangle.to.replace="upper", replaces the upper triangle of a square matrix with its lower triangle.
GeoDistanceInMetresMatrix = function(df.geopoints) {
# Returns a matrix (M) of distances between geographic points. M[i,j] = M[j,i] = Distance between (df.geopoints$lat[i], df.geopoints$lon[i]) and (df.geopoints$lat[j], df.geopoints$lon[j]). The row and column names are given by df.geopoints$name.
GeoDistanceInMetres = function(g1, g2) {
# Returns a vector of distances. (But if g1$index > g2$index, returns zero.) The 1st value in the returned vector is the distance between g1[[1]] and g2[[1]]. The 2nd value in the returned vector is the distance between g1[[2]] and g2[[2]]. Etc. Each g1[[x]] or g2[[x]] must be a list with named elements "index", "lat" and "lon". E.g. g1 = list(list("index"=1, "lat"=12.1, "lon"=10.1), list("index"=3, "lat"=12.1, "lon"=13.2))
DistM = function(g1, g2) {
require("Imap")
return(ifelse(
g1$index > g2$index,
0,
gdist(lat.1 = g1$lat, lon.1 = g1$lon, lat.2 = g2$lat, lon.2 = g2$lon, units = "m")))
}
return(mapply(DistM, g1, g2))
}
n.geopoints = nrow(df.geopoints)
# The index column is used to ensure we only do calculations for the upper triangle of points
df.geopoints$index = 1:n.geopoints
# Create a list of lists
list.geopoints = by(df.geopoints[, c("index", "lat", "lon")], 1:n.geopoints, function(x) {
return(list(x))
})
# Get a matrix of distances (in metres)
mat.distances = ReplaceLowerOrUpperTriangle(outer(list.geopoints, list.geopoints, GeoDistanceInMetres), "lower")
# Set the row and column names
rownames(mat.distances) = df.geopoints$name
colnames(mat.distances) = df.geopoints$name
return(mat.distances)}
Using IBS and geographic distance matrices
sintIBS = read.table("../data/snps/sintNoClones.ibsMat")%>% as.matrix() %>% as.dist(diag = FALSE)
coords = read.csv("../data/stephanocoeniaMetaData.csv")[-c(66,68,164,166,209,211),] %>% select(name = Sample, lat = latDD, lon = longDD)
sintGeo = as.dist(GeoDistanceInMetresMatrix(coords)/1000, diag = FALSE)
set.seed(694)
fkSintMantel = mantel.randtest(sintIBS, sintGeo, nrepet = 9999)
fkSintMantel
## Monte-Carlo test
## Call: mantel.randtest(m1 = sintIBS, m2 = sintGeo, nrepet = 9999)
##
## Observation: 0.00531085
##
## Based on 9999 replicates
## Simulated p-value: 0.2605
## Alternative hypothesis: greater
##
## Std.Obs Expectation Variance
## 0.54105536726 -0.00002912277 0.00009740799
Using Nei’s genetic distance and geographic distance matrices
sintVcf = read.vcfR("../data/snps/sintNoClones.bcf", verbose = FALSE)
sintGenlight = vcfR2genlight(sintVcf, n.cores = 2)
popdata = read.csv("../data/stephanocoeniaMetaData.csv")[-c(66,68,164,166,209,211),] %>% select(sample = Sample, region, depthZone) %>% unite("population", region, depthZone, sep = "_", remove = TRUE)
strata(sintGenlight) = data.frame(popdata) #cubaGenlight was generated in Odination Analyses above
setPop(sintGenlight)= ~population
sintNeiDist = as.dist(stamppNeisD(sintGenlight, pop = TRUE), diag = F)
popCoords = read.csv("../data/stephanocoeniaMetaData.csv")[-c(66,68,164,166,209,211),] %>% group_by(region, depthZone) %>% summarize(lat = mean(latDD), lon = mean(longDD)) %>% unite("name", region, depthZone, sep = "_", remove = TRUE) %>% as.data.frame()
## `summarise()` regrouping output by 'region' (override with `.groups` argument)
popCoords$name = factor(popCoords$name)
popCoords$name = factor(popCoords$name, levels = levels(popCoords$name)[c(5, 6, 3, 4, 1, 2, 8, 7)])
popCoords = popCoords %>% arrange(levels(popCoords$name))
# row.names(popCoords) = popCoords$population
sintPopsGeo = as.dist(GeoDistanceInMetresMatrix(popCoords)/1000, diag = F)
set.seed(694)
sintPopIBD = mantel.randtest(sintPopsGeo, sintNeiDist, nrepet = 9999)
sintPopIBD
## Monte-Carlo test
## Call: mantel.randtest(m1 = sintPopsGeo, m2 = sintNeiDist, nrepet = 9999)
##
## Observation: -0.1805525
##
## Based on 9999 replicates
## Simulated p-value: 0.8034
## Alternative hypothesis: greater
##
## Std.Obs Expectation Variance
## -0.936153870 -0.001178363 0.036713437
Using IBS and depth distance matrices
sintIBS = read.table("../data/snps/sintNoClones.ibsMat")%>% as.matrix() %>% as.dist(diag = FALSE)
sintDepths = read.csv("../data/stephanocoeniaMetaData.csv")[-c(66,68,164,166,209,211),] %>% select(name = Sample, depthM) %>% dist(method = "euclidean")
set.seed(694)
fkSintMantel = mantel.randtest(sintIBS, sintDepths, nrepet = 9999)
fkSintMantel
## Monte-Carlo test
## Call: mantel.randtest(m1 = sintIBS, m2 = sintDepths, nrepet = 9999)
##
## Observation: 0.1404622
##
## Based on 9999 replicates
## Simulated p-value: 0.0001
## Alternative hypothesis: greater
##
## Std.Obs Expectation Variance
## 9.6064373522 -0.0002114321 0.0002144374
mantelObs = round(fkSintMantel$obs, 3)
mantelP = fkSintMantel$pvalue
sintIBSdist = melt(as.matrix(sintIBS), varnames = c("row", "col"), value.name = "ibs")
sintIBSdist = sintIBSdist[sintIBSdist$row != sintIBSdist$col,]
sintDepth = melt(as.matrix(sintDepths), varnames = c("row", "col"), value.name = "depth")
sintDepth = sintDepth[sintDepth$row != sintDepth$col,]
snpMantelDF = data.frame(cbind(sintIBSdist$ibs, sintDepth$depth))
colnames(snpMantelDF) = c("ibs", "depth")
sintIBSdepthMantelA = ggplot(data = snpMantelDF, aes(x = depth, y = ibs)) +
scale_fill_gradientn(colors = paletteer_d("wesanderson::Zissou1")) +
geom_point(shape = 21, fill = "gray40", alpha = 0.25, size = 0.5) +
stat_density_2d(aes(fill = stat(density)), n = 300, contour = FALSE, geom = "raster", alpha = 0.75) +
geom_smooth(method = lm, col = "black", fill = "gray40", fullrange = TRUE, size = 0.5) +
scale_x_continuous(expand = c(0,0)) +
scale_y_continuous(expand = c(0,0)) +
annotate(geom = "text", x = 35, y = 0.16, label = bquote(~italic(r)~" = "~.(mantelObs)*", "~italic(p)~" = "~.(mantelP)), size = 3) +
labs(x = expression(paste(Delta," depth (m)")), y = "Genetic distance (1 - IBS)") +
theme_bw()
sintIBSdepthMantel = sintIBSdepthMantelA + theme(
axis.title.x = element_text(size = 12, color = "black"),
axis.text.x = element_text(size = 12, color = "black"),
axis.ticks.x = element_line(color = "black"),
axis.line.x = element_blank(),
axis.title.y = element_text(size = 12, color = "black"),
axis.text.y = element_text(size = 12, color = "black"),
axis.ticks.y = element_line(color = "black"),
axis.line.y = element_blank(),
panel.border = element_rect(size = 1.2, color = "black"),
plot.margin = margin(0.2,0.5,0.1,0.1, unit = "cm"),
legend.position = "none")
# sintIBSdepthMantel
ggsave("../figures/mantelPlot.tiff", plot = sintIBSdepthMantel, width = 12, height = 7, units = "cm", dpi = 300)
## `geom_smooth()` using formula 'y ~ x'
ggsave("../figures/mantelPlot.pdf", plot = sintIBSdepthMantel, width = 12, height = 7, units = "cm", dpi = 300)
## `geom_smooth()` using formula 'y ~ x'
ggsave("../figures/mantelPlot.png", plot = sintIBSdepthMantel, width = 12, height = 7, units = "cm", dpi = 300)
## `geom_smooth()` using formula 'y ~ x'
Population structure plot from NGSadmix/CLUMPAK results. Based on most probable values for K (2,4).
# colPal = c("#f98400", "#fa9626", "#fcbc75", "#fddfbd")
# colPal = c("#5B1A18", "#9D4A28", "#D67236", "#F1BB7B")
# colPal = c("#5B1A18", "#b75c2f", "#F1BB7B", "#d18650")
# colPal = c("#8C4A3B", "#BF5E49", "#F2DCDC", "#BF8173")
# colPal = c("#b75c2f", "#d18650", "#f8dbb9", "#F1BB7B")
# colPal = c("#F1BB7B", "#f8dbb9", "#b75c2f", "#d18650")
kColPal = c("#BF5E49", "#BF8173", "#F1BB7B", "#D18650")
admixpops = read.csv("../data/stephanocoeniaMetaData.csv")[-c(66, 68, 164, 166, 209, 211),] %>% select("sample" = tubeID, "pop" = region, "depth" = depthZone)
admixpops$popdepth = as.factor(paste(admixpops$pop, admixpops$depth, sep = " "))
clumpp2 = read.table("../data/snps/k/ClumppK2.output", header = FALSE)
clumpp4 = read.table("../data/snps/k/ClumppK4.output", header = FALSE)
clumpp2$V1 = admixpops$sample
clumpp4$V1 = admixpops$sample
fkSintAdmix = admixpops %>% left_join(clumpp2[,c(1,6:7)], by = c("sample" = "V1")) %>%
left_join(clumpp4[,c(1, 6:9)], by = c("sample" = "V1")) %>%
rename("cluster2.1" = "V6.x", "cluster2.2" = "V7.x", "cluster4.1" = "V6.y", "cluster4.2" = "V7.y", "cluster4.3" = "V8", "cluster4.4" = "V9")
fkSintAdmix$pop = factor(fkSintAdmix$pop, levels(fkSintAdmix$pop)[c( 2, 3, 1, 4)])
fkSintAdmix$depth = factor(fkSintAdmix$depth, levels(fkSintAdmix$depth)[c(2, 1)])
fkSintAdmix$popdepth = factor(fkSintAdmix$popdepth, levels(fkSintAdmix$popdepth)[c(4, 3, 6, 5, 2, 1, 8, 7)])
# fkSintAdmix = arrange(fkSintAdmix, pop, depth, -cluster4.1, cluster4.2, cluster4.3)
fkSintAdmix = arrange(fkSintAdmix, pop, depth, -cluster4.1, -cluster4.2, cluster4.3)
popCounts = fkSintAdmix %>% group_by(popdepth) %>% summarize(n = n())
## `summarise()` ungrouping output (override with `.groups` argument)
popCounts
## # A tibble: 8 x 2
## popdepth n
## <fct> <int>
## 1 Riley's Hump Shallow 30
## 2 Riley's Hump Mesophotic 15
## 3 Tortugas Bank Shallow 30
## 4 Tortugas Bank Mesophotic 25
## 5 Lower Keys Shallow 30
## 6 Lower Keys Mesophotic 30
## 7 Upper Keys Shallow 30
## 8 Upper Keys Mesophotic 30
popCountList = reshape2::melt(lapply(popCounts$n,function(x){c(1:x)}))
fkSintAdmix$ord = popCountList$value
fkSintAdmixMelt = melt(fkSintAdmix, id.vars=c("sample", "pop", "depth", "popdepth", "ord"), variable.name="Ancestry", value.name="Fraction")
fkSintAdmixMelt$Ancestry = factor(fkSintAdmixMelt$Ancestry, levels = rev(levels(fkSintAdmixMelt$Ancestry)))
popAnno = data.frame(x1 = c(0.5, 0.5, 0.5, 0.5), x2 = c(30.5, 30.5, 30.5, 30.5),
y1 = -0.17, y2 = -0.17, sample = NA, Ancestry = NA, depth = "Mesophotic",
ord = NA, Fraction = NA,
pop = c("Riley's Hump", "Tortugas Bank",
"Lower Keys", "Upper Keys"))
popAnno$pop = factor(popAnno$pop, levels = levels(popAnno$pop)[c(4, 1, 3, 2)])
admixTheme = theme_bw()+
theme(plot.title = element_text(hjust = 0.5),
panel.grid = element_blank(),
panel.background = element_rect(fill = "black", colour = "grey25"),
panel.border = element_rect(fill = NA, color = "black", size = 1, linetype = "solid"),
panel.spacing.x = grid:::unit(0.05, "lines"),
panel.spacing.y = grid:::unit(0.05, "lines"),
axis.text.x = element_blank(),
axis.text.y = element_blank(),
axis.ticks.x = element_blank(),
axis.ticks.y = element_blank(),
axis.title = element_blank(),
strip.background.x = element_blank(),
strip.background.y = element_blank(),
strip.text = element_text(size = 10),
strip.text.y.left = element_text(angle = 90),
strip.text.x.bottom = element_text(vjust = -.1, color = "grey90"),
legend.key = element_blank(),
legend.position = "none",
legend.title = element_text(size = 18))
admixPlotK2A = ggplot(data = subset(fkSintAdmixMelt, subset = fkSintAdmixMelt$Ancestry %in% c("cluster2.1", "cluster2.2")), aes(x = ord, y = Fraction, fill = Ancestry, order = sample)) +
geom_segment(data = popAnno, aes(x = x1, xend = x2, y = y1, yend = y2, color = pop), size = 8) +
geom_bar(stat = "identity", position = "fill", width = 1, colour = "grey25", size = 0.2) +
facet_grid(depth ~ pop, scales = "free", switch = "both", space = "free") +
scale_x_discrete(expand = c(0.01, 0.01)) +
scale_y_continuous(expand = c(0.005, 0.005)) +
scale_fill_manual(values = kColPal[c(1,3)]) +
scale_color_manual(values = flPal) +
ggtitle(expression(paste(italic("K")," = 2", sep =""))) +
# coord_cartesian(expand = TRUE, clip = "off")
coord_cartesian(ylim = c(-0.01, 1.01), clip = "off")
admixPlotK2 = admixPlotK2A + admixTheme
admixPlotK4A = ggplot(data = subset(fkSintAdmixMelt, subset = !(fkSintAdmixMelt$Ancestry %in% c("cluster2.1", "cluster2.2"))), aes(x = ord, y = Fraction, fill = Ancestry, order = sample)) +
geom_segment(data = popAnno, aes(x = x1, xend = x2, y = y1, yend = y2, color = pop), size = 8) +
geom_bar(stat = "identity", position = "fill", width = 1, colour = "grey25", size = 0.2) +
facet_grid(depth ~ pop, scales = "free", switch = "both", space = "free") +
scale_x_discrete(expand = c(0.01, 0.01)) +
scale_y_continuous(expand = c(0.005, 0.005)) +
scale_fill_manual(values = kColPal) +
scale_color_manual(values = flPal) +
ggtitle(expression(paste(italic("K")," = 4", sep =""))) +
# coord_cartesian(expand = TRUE, clip = "off")
coord_cartesian(ylim = c(-.01, 1.01), clip = "off")
admixPlotK4 = admixPlotK4A + admixTheme +
theme(strip.text.y.left = element_blank())
admixPlot = admixPlotK2 | admixPlotK4
# admixPlot
ggsave("../figures/admixturePlot.png", plot = admixPlot, width = 22, height = 7, units = "cm", dpi = 300)
ggsave("../figures/admixturePlot.eps", plot = admixPlot, width = 22, height = 7, units = "cm", dpi = 300)
Now let’s examine algal symbiont communities with the results of SymPortal analysis of Symbiodiniaceae ITS2 sequences.
Read in SymPortal outputs for ITS2 sequences and ITS2 type profiles
stephanocoeniaMetaData = read.csv("../data/stephanocoeniaMetaData.csv", header = TRUE, check.names = FALSE)[-c(66, 68, 164, 166, 209, 211),] %>% select(c(Sample = tubeID, region, depthM, depthZone))
its2Seqs = read.delim("../data/ITS2/148_20210301_DBV_20210401T112728.seqs.absolute.abund_CLEAN.txt", header = TRUE, check.names = FALSE)
its2Profs = read.csv("../data/ITS2/148_20210301_DBV_20210401T112728.profiles.absolute.abund_CLEAN.csv", header = TRUE, check.names = FALSE)
its2Seqs = stephanocoeniaMetaData %>% right_join(its2Seqs) %>% arrange(Sample)
## Joining, by = "Sample"
its2Profs = stephanocoeniaMetaData %>% right_join(its2Profs) %>% arrange(Sample)
## Joining, by = "Sample"
its2Seqs$region = factor(its2Seqs$region, levels(its2Seqs$region)[c( 2, 3, 1, 4)])
its2Seqs$depthZone = factor(its2Seqs$depthZone, levels(its2Seqs$depthZone)[c(2, 1)])
head(its2Profs)
its2Profs$region = factor(its2Profs$region, levels(its2Profs$region)[c( 2, 3, 1, 4)])
its2Profs$depthZone = factor(its2Profs$depthZone, levels(its2Profs$depthZone)[c(2, 1)])
its2Profs = its2Profs %>% arrange(region, depthZone, desc(`C3/C3.10`), desc(`C1/C3-C42.2-C1dl-C3gl-C3gm-C3gk`), desc(`C3-C1-C3.10`), desc(`C3-C1dk-C15hx`), desc(`C3-C3go-C6c-C3gq-C3gp-C3gn-C3dw`), desc(`C16/C3-C16b`), desc(`C3-C3hb-C3ge-C3hc-C1dk`), desc(`C3-C3gr-C3gt-C3gs-C3.10`), desc(`C3/C1`),desc(`A3-A3b-A3at-A3ax`), desc(`A3-A3at-A3b-A3q-A3s`), desc(`A3-A3s-A3q`), desc(`A3`), desc(`A3-A3b-A3av-A3au-A3aw`),desc(`A4`), desc(`C3`), desc(`B18b`), desc(`B18c`), desc(`B5`))
sampleCounts = plyr::count(its2Profs, c('region','depthZone'))
meltedList = reshape2::melt(lapply(sampleCounts$freq,function(x){c(1:x)}))
its2Profs$barPlotOrder = meltedList$value
its2Profs=its2Profs[c(1,ncol(its2Profs),2:(ncol(its2Profs)-1))]
its2Seqs = its2Seqs %>% left_join(its2Profs[,c(1:2)]) %>% arrange(region, depthZone, barPlotOrder)
## Joining, by = "Sample"
its2Seqs = its2Seqs[,c(1, ncol(its2Seqs), 2:(ncol(its2Seqs)-1))]
head(its2Seqs)
## Sample barPlotOrder region depthM depthZone Symbiodinium A3 A3b A3at A3ax
## 1 SFK068 1 Riley's Hump 27.4 Shallow 0 15 0 0 0
## 2 SFK091 2 Riley's Hump 26.2 Shallow 0 34 0 0 0
## 3 SFK073 3 Riley's Hump 26.2 Shallow 0 45 0 0 0
## 4 SFK084 4 Riley's Hump 26.2 Shallow 0 11 0 0 0
## 5 SFK083 5 Riley's Hump 26.2 Shallow 0 16 0 0 0
## 6 SFK082 6 Riley's Hump 26.5 Shallow 0 16 0 0 0
## 43947_A 34778_A 495083_A 36534_A 34149_A 50854_A A3av A3s A3q 33981_A 1402229_A A3au
## 1 0 0 0 0 0 0 0 0 0 0 0 0
## 2 0 0 0 0 0 0 0 0 0 0 0 0
## 3 0 0 0 0 0 0 0 0 0 0 0 0
## 4 0 0 0 0 0 0 0 0 0 0 0 0
## 5 0 0 0 0 0 0 0 0 0 0 0 0
## 6 0 0 0 0 0 0 0 0 0 0 0 0
## A3aw 50835_A 34175_A 33953_A A3r 1402205_A 364481_A 363583_A A4 1402230_A 50833_A
## 1 0 0 0 0 0 0 0 0 0 0 0
## 2 0 0 0 0 0 0 0 0 0 0 0
## 3 0 0 0 0 0 0 0 0 0 0 0
## 4 0 0 0 0 0 0 0 0 0 0 0
## 5 0 0 0 0 0 0 0 0 4 0 0
## 6 0 0 0 0 0 0 0 0 0 0 0
## 50842_A 363143_A 72388_A 363606_A 797686_A 22386_A 529468_A 34696_A 363636_A
## 1 0 0 0 0 0 0 0 0 0
## 2 0 0 0 0 0 0 0 0 0
## 3 0 0 0 0 0 0 0 0 0
## 4 0 0 0 0 0 0 0 0 0
## 5 0 0 0 0 0 0 0 0 0
## 6 0 0 0 0 0 0 0 0 0
## 1402231_A 34151_A 364459_A 363570_A 363645_A 363578_A 1402232_A 364267_A 363625_A
## 1 0 0 0 0 0 0 0 0 0
## 2 0 0 0 0 0 0 0 0 0
## 3 0 0 0 0 0 0 0 0 0
## 4 0 0 0 0 0 0 0 0 0
## 5 0 0 0 0 0 0 0 0 0
## 6 0 0 0 0 0 0 0 0 0
## 50845_A 363142_A 1402267_A 22415_A 363639_A 905679_A 363598_A 363706_A 363685_A
## 1 0 0 0 0 0 0 0 0 0
## 2 0 0 0 0 0 0 0 0 0
## 3 0 0 0 0 0 0 0 0 0
## 4 0 0 0 0 0 0 0 0 0
## 5 0 0 0 0 0 0 0 0 0
## 6 0 0 0 0 0 0 0 0 0
## 1402206_A 1402233_A 1402235_A 43753_A 500385_A 1402234_A A3d 1402202_A 1402236_A
## 1 0 0 0 0 0 0 0 0 0
## 2 0 0 0 0 0 0 0 0 0
## 3 0 0 0 0 0 0 0 0 0
## 4 0 0 0 0 0 0 0 0 0
## 5 0 0 0 0 0 0 0 0 0
## 6 0 0 0 0 0 0 0 0 0
## 364620_A 363593_A 367833_A 22400_A 50850_A 22463_A 363687_A 693524_A 37988_A 66961_A
## 1 0 0 0 0 0 0 0 0 0 0
## 2 0 0 0 0 0 0 0 0 0 0
## 3 0 0 0 0 0 0 0 0 0 0
## 4 0 0 0 0 0 0 0 0 0 0
## 5 0 0 0 0 0 0 0 0 0 0
## 6 0 0 0 0 0 0 0 0 0 0
## 22436_A 363590_A 22426_A 45527_A A6b 36953_A 363563_A 37985_A 693526_A 22451_A
## 1 0 0 0 0 0 0 0 0 0 0
## 2 0 0 0 0 0 0 0 0 0 0
## 3 0 0 0 0 0 0 0 0 0 0
## 4 0 0 0 0 0 0 0 0 0 0
## 5 0 0 0 0 0 0 0 0 0 0
## 6 0 0 0 0 0 0 0 0 0 0
## 33927_A 1402203_A A4.3 35200_A 22392_A 65140_A 1402245_A 364567_A 364639_A 1402216_A
## 1 0 0 0 0 0 0 0 0 0 0
## 2 0 0 0 0 0 0 0 0 0 0
## 3 0 0 0 0 0 0 0 0 0 0
## 4 0 0 0 0 0 0 0 0 0 0
## 5 0 0 0 0 0 0 0 0 0 0
## 6 0 0 6 0 0 0 0 0 0 0
## 22464_A 22429_A 49905_A 49571_A 29211_A 36825_A 364218_A 1402237_A 363617_A 73521_A
## 1 0 0 0 0 0 0 0 0 0 0
## 2 0 0 0 0 0 0 0 0 0 0
## 3 0 0 0 0 0 0 0 0 0 0
## 4 0 0 0 0 0 0 0 0 0 0
## 5 0 0 0 0 0 0 0 0 0 0
## 6 0 0 0 0 0 0 0 0 0 0
## 364172_A 363654_A 1402274_A 49906_A 37990_A 50843_A 363674_A 363646_A 33878_A
## 1 0 0 0 0 0 0 0 0 0
## 2 0 0 0 0 0 0 0 0 0
## 3 0 0 0 0 0 0 0 0 0
## 4 0 0 0 0 0 0 0 0 0
## 5 0 0 0 0 0 0 0 0 0
## 6 0 0 0 0 0 0 0 0 0
## 1402268_A 1402282_A 22444_A 373280_A 1402238_A 366219_A 69439_A Breviolum B5 B18c
## 1 0 0 0 0 0 0 0 0 0 0
## 2 0 0 0 0 0 0 0 0 0 0
## 3 0 0 0 0 0 0 0 12 0 15
## 4 0 0 0 0 0 0 0 0 0 0
## 5 0 0 0 0 0 0 0 0 0 0
## 6 0 0 0 0 0 0 0 0 0 0
## B18b 1402208_B 1402209_B 45548_B 1402210_B 43411_B 37534_B 1402211_B 1402212_B
## 1 0 0 0 0 0 0 0 0 0
## 2 0 0 0 0 0 0 0 0 0
## 3 11 0 0 0 0 0 0 0 0
## 4 0 0 0 0 0 0 0 0 0
## 5 0 0 0 0 0 0 0 0 0
## 6 0 0 0 0 0 0 0 0 0
## 1402213_B 1160454_B 1402214_B 1402215_B 37591_B B5ai 1402254_B 71511_B 1402256_B
## 1 0 0 0 0 0 0 0 0 0
## 2 0 0 0 0 0 0 0 0 0
## 3 0 0 0 0 0 0 0 0 7
## 4 0 0 0 0 0 0 0 0 0
## 5 0 0 0 0 0 0 0 0 0
## 6 0 0 0 0 0 0 0 0 0
## 71527_B 1402255_B 1402257_B 71517_B 71509_B 71508_B 1402258_B 71518_B 71510_B
## 1 0 0 0 0 0 0 0 0 0
## 2 0 0 0 0 0 0 0 0 0
## 3 0 0 0 0 0 0 0 0 0
## 4 0 0 0 0 0 0 0 0 0
## 5 0 0 0 0 0 0 0 0 0
## 6 0 0 0 0 0 0 0 0 0
## 1402259_B 368876_B 1402260_B 50427_B 1402262_B 71525_B 71523_B 71515_B 368004_B
## 1 0 0 0 0 0 0 0 0 0
## 2 0 0 0 0 0 0 0 0 0
## 3 0 0 0 0 5 0 0 0 0
## 4 0 0 0 0 0 0 0 0 0
## 5 0 0 0 0 0 0 0 0 0
## 6 0 0 0 0 0 0 0 0 0
## 1402261_B 71526_B 71524_B 1402266_B 1402265_B 1402264_B 1402263_B 43545_B 1402252_B
## 1 0 0 0 0 0 0 0 0 0
## 2 0 0 0 0 0 0 0 0 0
## 3 0 0 0 0 0 0 0 0 0
## 4 0 0 0 0 0 0 0 0 0
## 5 0 0 0 0 0 0 0 0 0
## 6 0 0 0 0 0 0 0 0 0
## 900132_B 1390171_B 1402253_B B1 1402276_B 1402280_B 38112_B Cladocopium C3 C1 C16
## 1 0 0 0 0 0 0 0 1452 775 9805 0
## 2 0 0 0 0 0 0 0 1291 761 8099 0
## 3 0 0 0 0 0 0 0 1178 978 7422 0
## 4 0 0 0 0 0 0 0 1153 831 7790 0
## 5 0 0 0 0 0 0 0 1049 682 7146 0
## 6 0 0 0 0 0 0 0 881 1161 5995 0
## C3go C3.10 C42.2 C1dl C3gm C3gl C3hb C3gr C16b 110271_C 334025_C C3gk C1dk 22330_C
## 1 0 0 1157 719 607 882 0 0 0 0 0 399 0 0
## 2 0 0 747 664 416 415 0 0 0 0 0 327 0 0
## 3 0 0 782 710 500 546 0 0 0 0 0 298 0 0
## 4 0 0 755 691 392 391 0 0 0 0 0 366 0 0
## 5 0 0 1052 729 507 639 0 0 0 0 0 369 0 0
## 6 0 0 598 545 324 391 0 0 0 0 0 233 0 0
## 11408_C 18596_C 21897_C C6c C3gq C3gp 65808_C C3gn C15hx C3dw C1cy 1402187_C 20795_C
## 1 468 301 0 0 0 0 0 0 0 0 237 0 237
## 2 272 228 0 0 0 0 0 0 0 0 219 0 195
## 3 301 246 0 0 0 0 0 0 0 0 211 0 160
## 4 189 263 0 0 0 0 0 0 0 0 173 0 177
## 5 277 229 0 0 0 0 0 0 0 0 207 0 175
## 6 209 185 0 0 0 0 0 0 0 0 159 0 171
## C93.1 65703_C 385070_C C3ge 1402188_C 1372_C 3238_C 95094_C C3hc 24879_C 91373_C
## 1 0 0 210 0 0 0 0 0 0 0 0
## 2 0 0 120 0 0 0 0 152 0 0 0
## 3 0 0 142 0 0 0 0 145 0 0 0
## 4 0 0 130 0 0 0 0 133 0 0 0
## 5 0 0 149 0 0 0 0 76 0 0 0
## 6 0 0 114 0 0 0 0 81 0 0 0
## 3699_C C3gt C3dz 20934_C C1af C3gs 25557_C 40208_C 470358_C 40209_C 1402193_C 2239_C
## 1 0 0 0 0 137 0 0 0 0 0 0 0
## 2 0 0 0 0 95 0 0 0 0 0 0 0
## 3 0 0 0 0 86 0 0 0 0 0 0 0
## 4 0 0 0 0 83 0 0 0 0 0 0 0
## 5 0 0 0 0 114 0 0 0 0 0 0 0
## 6 0 0 0 0 63 0 0 0 0 0 0 0
## 1402195_C C16a 17495_C 17534_C 2097_C 40211_C 93722_C C1v 40207_C 40212_C 1402196_C
## 1 0 0 0 88 0 0 0 0 0 0 0
## 2 0 0 90 57 0 0 110 61 0 0 0
## 3 0 0 0 67 0 0 0 72 0 0 0
## 4 0 0 62 74 0 0 125 64 0 0 0
## 5 0 0 63 80 0 0 0 0 0 0 0
## 6 0 0 60 61 0 0 0 0 0 0 0
## 1402197_C 9944_C 1402198_C 1402219_C 470998_C 54162_C 22574_C 20921_C 33343_C
## 1 0 0 0 0 0 0 0 0 0
## 2 0 0 0 0 0 0 0 0 67
## 3 0 0 0 0 0 0 0 0 57
## 4 0 0 0 0 0 0 0 0 0
## 5 0 0 0 0 0 0 0 0 0
## 6 0 0 0 0 0 0 0 0 0
## 1402200_C 25492_C 1402218_C 3240_C 2037_C 85729_C 5371_C 1402225_C 909389_C 1402207_C
## 1 0 0 0 0 0 0 0 0 0 0
## 2 0 0 0 0 0 0 0 0 0 0
## 3 0 0 0 0 0 0 60 0 0 0
## 4 0 0 0 0 0 0 0 0 0 0
## 5 0 0 0 0 0 0 0 0 0 0
## 6 0 0 0 0 0 0 0 0 0 0
## C3ag 2428_C 1402220_C 4062_C 103828_C 1402199_C 1398518_C 90670_C 1402204_C 1402227_C
## 1 0 0 0 0 0 0 0 0 0 0
## 2 0 0 0 0 0 0 0 0 0 0
## 3 0 0 0 0 0 0 0 0 0 0
## 4 0 0 0 0 0 0 0 0 0 0
## 5 0 0 0 0 0 0 0 0 0 0
## 6 0 0 0 0 0 0 0 0 0 0
## 1402248_C C6b 1402247_C 1402194_C 870_C 71029_C C3ga 91285_C 1402192_C 1402244_C C1bz
## 1 0 0 0 0 0 0 0 0 0 148 0
## 2 0 0 0 0 0 0 0 0 0 0 0
## 3 0 0 0 0 0 0 0 0 0 0 0
## 4 0 0 0 0 0 0 0 0 0 0 0
## 5 0 0 0 0 0 0 0 0 0 0 0
## 6 0 0 0 0 0 0 0 0 0 0 0
## 18746_C 1402228_C 694_C C3i 1402250_C 1402243_C C21 1402281_C C3ca 9108_C 11201_C
## 1 0 0 0 0 0 0 0 0 0 0 0
## 2 0 0 0 0 0 0 0 0 0 0 0
## 3 0 0 0 0 0 0 0 0 0 0 0
## 4 0 0 0 0 0 0 0 0 0 0 0
## 5 0 0 0 0 0 0 0 0 0 0 0
## 6 0 0 0 0 0 0 0 0 0 0 0
## 11191_C 7821_C 1402273_C C3ck 1402240_C 1402249_C 99988_C 1356_C 69324_C 24193_C C3bb
## 1 0 0 0 0 0 0 0 0 0 0 0
## 2 0 0 0 0 0 0 0 0 0 0 0
## 3 0 0 0 0 0 0 0 0 0 0 0
## 4 0 0 0 0 0 0 0 0 0 0 0
## 5 0 0 0 0 0 0 0 0 0 0 0
## 6 0 0 0 0 0 0 0 0 0 0 0
## C40f 1401572_C 47282_C 16815_C 5726_C 1402270_C 1402221_C C1ap 1402275_C 21673_C
## 1 0 0 0 0 0 0 0 0 0 0
## 2 0 0 0 0 0 0 0 0 0 0
## 3 0 0 0 0 0 0 0 0 0 0
## 4 0 0 0 0 0 0 0 0 0 0
## 5 0 0 0 0 0 0 0 0 0 0
## 6 0 0 0 0 0 0 0 0 0 0
## 69758_C C1bt 1402269_C 2943_C C70 1402271_C 42218_C 1402277_C 9807_C C1ai C3t 54249_C
## 1 0 0 0 0 0 0 0 0 0 0 0 0
## 2 0 0 0 0 0 0 0 0 0 0 0 0
## 3 0 0 0 0 0 0 0 0 0 0 0 0
## 4 0 0 0 0 0 0 0 0 0 0 0 0
## 5 0 0 0 0 0 0 0 0 0 0 0 0
## 6 0 0 0 0 0 0 0 0 0 0 0 0
## 26258_C 1402278_C 1402272_C C1x 40218_C 21804_C 1402279_C C3de 13929_C 23354_C
## 1 0 0 0 0 0 0 0 0 0 0
## 2 0 0 0 0 0 0 0 0 0 0
## 3 0 0 0 0 0 0 0 0 0 0
## 4 0 0 0 0 0 0 0 0 0 0
## 5 0 0 0 0 0 0 0 0 0 0
## 6 0 0 0 0 0 0 0 0 0 0
## 1402223_C 99010_C 983542_C 3366_C 1402226_C 1402222_C 42529_C 2152_C 62532_C 4558_C
## 1 0 0 0 0 0 0 0 0 0 0
## 2 0 0 0 0 0 0 0 0 0 0
## 3 0 0 0 0 0 0 0 0 0 0
## 4 0 0 0 0 0 0 0 0 0 0
## 5 0 0 0 0 0 0 0 0 0 0
## 6 0 0 0 0 0 0 0 0 0 0
## 2427_C 1829_C C3fo 18793_C 11809_C 31248_C 1402241_C 816_C 921460_C C3ao C3an C3cn
## 1 0 0 0 0 0 0 0 0 0 0 0 0
## 2 0 0 0 0 0 0 0 0 0 0 0 0
## 3 0 0 0 0 0 0 0 0 0 0 0 0
## 4 0 0 0 0 0 0 0 0 0 0 0 0
## 5 0 0 0 0 0 0 0 0 0 0 0 0
## 6 0 0 0 0 0 0 0 0 0 0 0 0
## 3241_C 103581_C 21093_C 1402224_C 18813_C 1402201_C 22869_C 23865_C C6a C1j 1402239_C
## 1 0 0 0 0 0 0 0 0 0 0 0
## 2 0 0 0 0 0 0 0 0 0 0 0
## 3 0 0 0 0 0 0 0 0 0 0 0
## 4 0 0 0 0 0 0 0 0 0 0 0
## 5 0 0 0 0 0 0 0 0 0 0 0
## 6 0 0 0 0 0 0 0 0 0 0 0
## 3434_C 22178_C 17016_C 1402242_C 42518_C 54160_C 873_C C50f 1402246_C 1390080_C
## 1 0 0 0 0 0 0 0 0 0 0
## 2 0 0 0 0 0 0 0 0 0 0
## 3 0 0 0 0 0 0 0 0 0 0
## 4 0 0 0 0 0 0 0 0 0 0
## 5 0 0 0 0 0 0 0 0 0 0
## 6 0 0 0 0 0 0 0 0 0 0
## 113247_C 99987_C 3601_C 1866_C 1402251_C 2895_C 9153_C C3fn 866_C 864_C 18159_C 990_C
## 1 0 0 0 0 0 0 0 0 0 0 0 0
## 2 0 0 0 0 0 0 0 0 0 0 0 0
## 3 0 0 0 0 0 0 0 0 0 0 0 0
## 4 0 0 0 0 0 0 0 0 0 0 0 0
## 5 0 0 0 0 0 0 0 0 0 0 0 0
## 6 0 0 0 0 0 0 0 0 0 0 0 0
## 1402189_C 21205_C C3fc 871_C 1402190_C 8117_C 55844_C 54218_C 51874_C 874099_C
## 1 0 0 0 0 0 0 0 0 0 0
## 2 0 0 0 0 0 0 0 0 0 0
## 3 0 0 0 0 0 0 0 0 0 0
## 4 0 0 0 0 0 0 0 0 0 0
## 5 0 0 0 0 0 0 0 0 0 0
## 6 0 0 0 0 0 0 0 0 0 0
## 27927_C C65b 46049_C 37410_C 28411_C D1 G3l G3b
## 1 0 0 0 0 0 0 0 0
## 2 0 0 0 0 0 0 0 0
## 3 0 0 0 0 0 0 0 0
## 4 0 0 0 0 0 0 0 4
## 5 0 0 0 0 0 0 0 0
## 6 0 0 0 0 0 0 0 0
head(its2Profs)
## Sample barPlotOrder region depthM depthZone A3-A3b-A3at-A3ax
## 1 SFK068 1 Riley's Hump 27.4 Shallow 0
## 2 SFK091 2 Riley's Hump 26.2 Shallow 0
## 3 SFK073 3 Riley's Hump 26.2 Shallow 0
## 4 SFK084 4 Riley's Hump 26.2 Shallow 0
## 5 SFK083 5 Riley's Hump 26.2 Shallow 0
## 6 SFK082 6 Riley's Hump 26.5 Shallow 0
## A3-A3at-A3b-A3q-A3s A3-A3s-A3q A3 A3-A3b-A3av-A3au-A3aw A4 B18b B18c B5 C3/C3.10
## 1 0 0 0 0 0 0 0 0 0
## 2 0 0 0 0 0 0 0 0 0
## 3 0 0 0 0 0 0 0 0 0
## 4 0 0 0 0 0 0 0 0 0
## 5 0 0 0 0 0 0 0 0 0
## 6 0 0 0 0 0 0 0 0 0
## C1/C3-C42.2-C1dl-C3gl-C3gm-C3gk C3-C1-C3.10 C3-C1dk-C15hx
## 1 14344 0 0
## 2 11429 0 0
## 3 11236 0 0
## 4 11216 0 0
## 5 11124 0 0
## 6 9247 0 0
## C3-C3go-C6c-C3gq-C3gp-C3gn-C3dw C16/C3-C16b C3-C3hb-C3ge-C3hc-C1dk
## 1 0 0 0
## 2 0 0 0
## 3 0 0 0
## 4 0 0 0
## 5 0 0 0
## 6 0 0 0
## C3-C3gr-C3gt-C3gs-C3.10 C3/C1 C3
## 1 0 0 0
## 2 0 0 0
## 3 0 0 0
## 4 0 0 0
## 5 0 0 0
## 6 0 0 0
its2SeqPerc = purgeOutliers(its2Seqs, count.columns = 6:length(its2Seqs), otu.cut = 0.0001, sampleZcut = -5)
## [1] "samples with counts below z-score -5 :"
## character(0)
## [1] "zscores:"
## numeric(0)
## [1] "OTUs passing frequency cutoff 0.0001 : 131"
its2SeqPerc$sum = apply(its2SeqPerc[, c(6:length(its2SeqPerc[1,]))], 1, function(x) {
sum(x, na.rm = T)
}
)
its2SeqPerc = cbind(its2SeqPerc[, c(1:5)], (its2SeqPerc[, c(6:(ncol(its2SeqPerc)-1))]
/ its2SeqPerc$sum))
apply(its2SeqPerc[, c(6:(ncol(its2SeqPerc)))], 1, function(x) {
sum(x, na.rm = T)
}
)
## 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22
## 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44
## 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66
## 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88
## 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110
## 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132
## 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154
## 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176
## 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198
## 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219
## 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
gssSeq = otuStack(its2SeqPerc, count.columns = c(6:length(its2SeqPerc[1, ])),
condition.columns = c(1:5)) %>% filter(otu != "summ") %>% droplevels() # remove summ rows
levels(gssSeq$otu)
## [1] "Symbiodinium" "A3" "A3b" "A3at" "A3ax"
## [6] "43947_A" "34778_A" "495083_A" "36534_A" "34149_A"
## [11] "50854_A" "A3av" "A3s" "A3q" "33981_A"
## [16] "1402229_A" "A3au" "A3aw" "50835_A" "34175_A"
## [21] "33953_A" "A3r" "1402205_A" "364481_A" "363583_A"
## [26] "A4" "1402230_A" "50833_A" "50842_A" "Breviolum"
## [31] "B5" "Cladocopium" "C3" "C1" "C16"
## [36] "C3go" "C3.10" "C42.2" "C1dl" "C3gm"
## [41] "C3gl" "C3hb" "C3gr" "C16b" "110271_C"
## [46] "334025_C" "C3gk" "C1dk" "22330_C" "11408_C"
## [51] "18596_C" "21897_C" "C6c" "C3gq" "C3gp"
## [56] "65808_C" "C3gn" "C15hx" "C3dw" "C1cy"
## [61] "1402187_C" "20795_C" "C93.1" "65703_C" "385070_C"
## [66] "C3ge" "1402188_C" "1372_C" "3238_C" "95094_C"
## [71] "C3hc" "24879_C" "91373_C" "3699_C" "C3gt"
## [76] "C3dz" "20934_C" "C1af" "C3gs" "25557_C"
## [81] "40208_C" "470358_C" "40209_C" "1402193_C" "2239_C"
## [86] "1402195_C" "C16a" "17495_C" "17534_C" "2097_C"
## [91] "40211_C" "93722_C" "C1v" "40207_C" "40212_C"
## [96] "1402196_C" "1402197_C" "9944_C" "1402198_C" "1402219_C"
## [101] "470998_C" "54162_C" "22574_C" "20921_C" "33343_C"
## [106] "1402200_C" "25492_C" "1402218_C" "3240_C" "2037_C"
## [111] "85729_C" "5371_C" "1402225_C" "909389_C" "1402207_C"
## [116] "C3ag" "2428_C" "1402220_C" "4062_C" "103828_C"
## [121] "1402199_C" "1398518_C" "90670_C" "1402204_C" "1402227_C"
## [126] "1402248_C" "C6b" "1402247_C" "870_C" "C3ga"
## [131] "1402244_C"
levels(gssSeq$depthZone)
## [1] "Shallow" "Mesophotic"
levels(gssSeq$region)
## [1] "Riley's Hump" "Tortugas Bank" "Lower Keys" "Upper Keys"
getPaletteA = colorRampPalette(c("#00A08A","#FFFFFF"))
getPaletteB = colorRampPalette(c("#EBCC2A","#FFFFFF"))
getPaletteC = colorRampPalette(c("#3B9AB2","#FFFFFF"))
seqPal = c(getPaletteA(31)[1:29], getPaletteB(3)[1:2], getPaletteC(102)[1:100])
its2SeqPlotA = ggplot(gssSeq, aes(x = barPlotOrder, y = count, fill = factor(otu))) +
geom_bar(position = "stack", stat = "identity", color = "black",
size = 0.25) +
ylab("Proportion") +
scale_fill_manual(values = seqPal)+
labs(fill = expression(paste(italic("ITS2"), " sequence"))) +
guides(fill = guide_legend(ncol = 9)) +
facet_grid(depthZone ~ region, scales = "free_x") + #faceting plots by Depth and Site
theme_bw()
its2SeqPlot = its2SeqPlotA +
theme(plot.title = element_text(hjust = 0.5),
panel.grid = element_blank(),
panel.background = element_rect(fill = "black", colour = "grey25"),
panel.border = element_rect(fill = NA, color = "black", size = 1, linetype = "solid"),
panel.spacing.x = grid:::unit(0.05, "lines"),
panel.spacing.y = grid:::unit(0.05, "lines"),
axis.text.x = element_blank(),
axis.text.y = element_blank(),
axis.ticks.x = element_blank(),
axis.ticks.y = element_blank(),
axis.title = element_blank(),
strip.background.x = element_blank(),
strip.background.y = element_blank(),
strip.text = element_text(size = 12),
strip.text.y.left = element_text(angle = 90),
strip.text.x.bottom = element_text(vjust = 0, color = "grey90"),
legend.key.size = unit(0.75, "line"),
legend.key = element_blank(),
legend.position = "bottom",
legend.title = element_text(angle = 90))
its2SeqPlot
its2ProfsPerc = its2Profs
its2ProfsPerc$sum = apply(its2ProfsPerc[, c(6:length(its2ProfsPerc[1,]))], 1, function(x) {
sum(x, na.rm = T)
})
its2ProfsPerc = cbind(its2ProfsPerc[, c(1:5)], (its2ProfsPerc[, c(6:(ncol(its2ProfsPerc)-1))]
/ its2ProfsPerc$sum))
head(its2ProfsPerc)
## Sample barPlotOrder region depthM depthZone A3-A3b-A3at-A3ax
## 1 SFK068 1 Riley's Hump 27.4 Shallow 0
## 2 SFK091 2 Riley's Hump 26.2 Shallow 0
## 3 SFK073 3 Riley's Hump 26.2 Shallow 0
## 4 SFK084 4 Riley's Hump 26.2 Shallow 0
## 5 SFK083 5 Riley's Hump 26.2 Shallow 0
## 6 SFK082 6 Riley's Hump 26.5 Shallow 0
## A3-A3at-A3b-A3q-A3s A3-A3s-A3q A3 A3-A3b-A3av-A3au-A3aw A4 B18b B18c B5 C3/C3.10
## 1 0 0 0 0 0 0 0 0 0
## 2 0 0 0 0 0 0 0 0 0
## 3 0 0 0 0 0 0 0 0 0
## 4 0 0 0 0 0 0 0 0 0
## 5 0 0 0 0 0 0 0 0 0
## 6 0 0 0 0 0 0 0 0 0
## C1/C3-C42.2-C1dl-C3gl-C3gm-C3gk C3-C1-C3.10 C3-C1dk-C15hx
## 1 1 0 0
## 2 1 0 0
## 3 1 0 0
## 4 1 0 0
## 5 1 0 0
## 6 1 0 0
## C3-C3go-C6c-C3gq-C3gp-C3gn-C3dw C16/C3-C16b C3-C3hb-C3ge-C3hc-C1dk
## 1 0 0 0
## 2 0 0 0
## 3 0 0 0
## 4 0 0 0
## 5 0 0 0
## 6 0 0 0
## C3-C3gr-C3gt-C3gs-C3.10 C3/C1 C3
## 1 0 0 0
## 2 0 0 0
## 3 0 0 0
## 4 0 0 0
## 5 0 0 0
## 6 0 0 0
# check that all proportions add up to 1
apply(its2ProfsPerc[, c(6:(ncol(its2ProfsPerc)))], 1, function(x) {
sum(x, na.rm = T)
})
## [1] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [42] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [83] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [124] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [165] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [206] 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Everything looks good and is ready to plot
gssProf = otuStack(its2ProfsPerc, count.columns = c(6:length(its2ProfsPerc[1, ])),
condition.columns = c(1:5)) %>% filter(otu != "summ") %>% droplevels() # remove summ rows
levels(gssProf$otu)
levels(gssProf$depthZone)
levels(gssProf$region)
# colorCount2 = length(c(4:length(its2ProfsPerc[1,]))) +1
# getPalette2 = colorRampPalette(paletteer_d("ggsci::cyan_material")[c(10:2)], bias = 1.4)
# profPal = c(paletteer_d("ggsci::deep-purple_material")[c(6:1)], paletteer_d("ggsci::amber_material")[c(6,4,2)], getPalette2(10))
# getPaletteA = colorRampPalette(c("#F95335","#FFFFFF"), bias = 1.4)
# getPaletteA = colorRampPalette(c("#00A08A","#FFFFFF"))
getPaletteA = colorRampPalette(c("#006994","#FFFFFF"))#08415C
# getPaletteA = colorRampPalette(c("#F21A00","#FFFFFF"))
# getPaletteB = colorRampPalette(c("#FCAF38","#FFFFFF"), bias = 1.4)
# getPaletteB = colorRampPalette(c("#EBCC2A","#FFFFFF"))
getPaletteB = colorRampPalette(c("#FFBF46","#FFFFFF"))
# getPaletteB = colorRampPalette(c("#EBCC2A","#FFFFFF"))
# getPaletteC = colorRampPalette(c("#50A3A4","#FFFFFF"), bias = 1.4)
# getPaletteC = colorRampPalette(c("#3B9AB2","#FFFFFF"))
getPaletteC = colorRampPalette(c("#085F63","#FFFFFF"))
# getPaletteC = colorRampPalette(c("#3B9AB2","#FFFFFF"))
profPal = c(getPaletteA(8)[1:6], getPaletteB(5)[1:3], getPaletteC(11)[1:10])
popAnno = data.frame(x1 = c(0.5, 0.5, 0.5, 0.5), x2 = c(30.5, 30.5, 30.5, 30.5),
y1 = -0.12, y2 = -0.12,
region = c("Riley's Hump", "Tortugas Bank", "Lower Keys", "Upper Keys"))
gssProf = gssProf %>% left_join(popAnno, by = "region")
# popAnno$region = factor(popAnno$region, levels = levels(popAnno$region)[c(4, 1, 3, 2)])
# popAnno$otu = factor(popAnno$otu, levels = levels(gssProf$otu))
# popAnno$depthZone = factor(popAnno$depthZone, levels = levels(gssProf$depthZone))
its2ProfsPlotA = ggplot(gssProf, aes(x = barPlotOrder, y = count, fill = otu)) +
geom_bar(position = "stack", stat = "identity", color = "black", size = 0.25) +
scale_fill_manual(values = profPal) +
scale_color_manual(values = rev(flPal)) +
geom_segment(aes(x = x1, xend = x2, y = y1, yend = y2, color = region), size = 9) +
labs(fill = expression(paste(italic("ITS2"), " type profile"))) +
guides(color = "none", fill = guide_legend(ncol = 3, reverse = FALSE)) +
facet_grid(depthZone ~ region, scales = "free", switch = "both", space = "free") + # faceting plots by Depth and Site
scale_x_discrete(expand = c(0.01, 0.01)) +
scale_y_continuous(expand = c(0.005, 0.005)) +
coord_cartesian(ylim = c(-.01, 1.01), clip = "off") +
theme_bw()
its2ProfsPlot = its2ProfsPlotA +
theme(plot.title = element_text(hjust = 0.5),
panel.grid = element_blank(),
panel.background = element_rect(fill = "black", colour = "grey25"),
panel.border = element_rect(fill = NA, color = "black", size = 1, linetype = "solid"),
panel.spacing.x = grid:::unit(0.05, "lines"),
panel.spacing.y = grid:::unit(0.05, "lines"),
axis.text.x = element_blank(),
axis.text.y = element_blank(),
axis.ticks.x = element_blank(),
axis.ticks.y = element_blank(),
axis.title = element_blank(),
strip.background.x = element_blank(),
strip.background.y = element_blank(),
strip.text = element_text(size = 12),
strip.text.y.left = element_text(angle = 90),
strip.text.x.bottom = element_text(vjust = 0, color = "grey90"),
legend.key.size = unit(0.75, "line"),
legend.key = element_blank(),
legend.position = "bottom",
legend.title = element_text(angle = 90))
its2ProfsPlot
ggsave("../figures/its2ProfilePlot.png", plot = its2ProfsPlot, width = 17.5, height = 13, units = "cm", dpi = 300)
ggsave("../figures/its2ProfilePlot.eps", plot = its2ProfsPlot, width = 19, height = 13.5, units = "cm", dpi = 300)
Using vegan::betadisper() to look at multivariate homogeneity of dispersion (PERMDISP) between sites and depths. This is using Bray-Curtis dissimilarity.
set.seed(694) #setting seed allows repetition of randomized processes
its2dispS = betadisper(vegdist(decostand(its2Profs[, c(6:ncol(its2Profs))], "normalize")), its2Profs$region)
anova(its2dispS)
## Analysis of Variance Table
##
## Response: Distances
## Df Sum Sq Mean Sq F value Pr(>F)
## Groups 3 0.0892 0.029731 0.7719 0.5109
## Residuals 215 8.2815 0.038518
No significant effect of Site on betadiversity.
set.seed(694)
its2dispD = betadisper(vegdist(decostand(its2Profs[, c(6:ncol(its2Profs))], "normalize")), its2Profs$depthZone)
anova(its2dispD)
## Analysis of Variance Table
##
## Response: Distances
## Df Sum Sq Mean Sq F value Pr(>F)
## Groups 1 0.3104 0.310449 10.815 0.001175 **
## Residuals 217 6.2290 0.028705
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Depth does significantly affect beta diversity.
Now let’s see how different communities are from each other with PERMANOVA. We will utilize the vegan::adonis() function. We will use Bray-Curtis similarity for our distance matrix and run a total 0f 9,999 permutations, and test the effects of Site, Depth, and the interaction between Site and Depth.
set.seed(694)
its2Adonis = adonis(decostand(its2Profs[, c(6:ncol(its2Profs))], "normalize") ~ region * depthZone,
data = its2Profs, permutations = 9999, method = "bray")
its2Adonis
##
## Call:
## adonis(formula = decostand(its2Profs[, c(6:ncol(its2Profs))], "normalize") ~ region * depthZone, data = its2Profs, permutations = 9999, method = "bray")
##
## Permutation: free
## Number of permutations: 9999
##
## Terms added sequentially (first to last)
##
## Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
## region 3 8.455 2.8183 8.4735 0.09229 0.0001 ***
## depthZone 1 5.368 5.3684 16.1410 0.05860 0.0001 ***
## region:depthZone 3 7.612 2.5374 7.6290 0.08309 0.0001 ***
## Residuals 211 70.178 0.3326 0.76602
## Total 218 91.613 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
adonisRegionF = round(its2Adonis$aov.tab[1,4], 3)
adonisDepthF = round(its2Adonis$aov.tab[2,4], 3)
adonisIntF = round(its2Adonis$aov.tab[3,4], 3)
adonisRegionP = its2Adonis$aov.tab[1,6]
adonisDepthP = its2Adonis$aov.tab[2,6]
adonisIntP = its2Adonis$aov.tab[3,6]
Since we found that Depth was a significant factor in our PERMANOVA we can now use pairwise PERMANOVA to reveal where differences occur across depth. This utilizes the package pairwiseAdonis, where we will again use Bray-Curtis similarity and 9,999 permutations. We also have added false discovery rate (FDR) corrections since we are perfoming multiple comparisons.
set.seed(694)
its2PWAdonis = pairwise.adonis2(decostand(its2Profs[, c(6:ncol(its2Profs))], "normalize") ~ region * depthZone , data = its2Profs, sim.method = "bray", p.adjust.m = "BH", perm = 9999)
its2PWAdonis
## $parent_call
## [1] "decostand(its2Profs[, c(6:ncol(its2Profs))], \"normalize\") ~ region * depthZone , strata = Null"
##
## $`Riley's Hump_vs_Tortugas Bank`
## Permutation: free
## Number of permutations: 9999
##
## Terms added sequentially (first to last)
##
## Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
## region 1 3.005 3.00545 8.1667 0.07203 0.0001 ***
## depthZone 1 2.368 2.36766 6.4336 0.05674 0.0001 ***
## region:depthZone 1 1.024 1.02426 2.7832 0.02455 0.0140 *
## Residuals 96 35.329 0.36801 0.84668
## Total 99 41.727 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## $`Riley's Hump_vs_Lower Keys`
## Permutation: free
## Number of permutations: 9999
##
## Terms added sequentially (first to last)
##
## Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
## region 1 4.577 4.5770 13.8973 0.10287 0.0001 ***
## depthZone 1 4.651 4.6510 14.1220 0.10453 0.0001 ***
## region:depthZone 1 2.332 2.3316 7.0794 0.05240 0.0001 ***
## Residuals 100 32.935 0.3293 0.74020
## Total 103 44.494 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## $`Riley's Hump_vs_Upper Keys`
## Permutation: free
## Number of permutations: 9999
##
## Terms added sequentially (first to last)
##
## Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
## region 1 3.437 3.4374 10.1383 0.08247 0.0001 ***
## depthZone 1 2.461 2.4614 7.2598 0.05906 0.0001 ***
## region:depthZone 1 1.536 1.5364 4.5315 0.03686 0.0018 **
## Residuals 101 34.244 0.3390 0.82161
## Total 104 41.679 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## $`Tortugas Bank_vs_Lower Keys`
## Permutation: free
## Number of permutations: 9999
##
## Terms added sequentially (first to last)
##
## Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
## region 1 0.981 0.9811 3.0034 0.02138 0.0126 *
## depthZone 1 7.693 7.6933 23.5502 0.16762 0.0001 ***
## region:depthZone 1 1.290 1.2895 3.9475 0.02810 0.0025 **
## Residuals 110 35.934 0.3267 0.78291
## Total 113 45.898 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## $`Tortugas Bank_vs_Upper Keys`
## Permutation: free
## Number of permutations: 9999
##
## Terms added sequentially (first to last)
##
## Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
## region 1 1.694 1.69407 5.0490 0.03770 0.0007 ***
## depthZone 1 2.886 2.88595 8.6013 0.06422 0.0001 ***
## region:depthZone 1 3.112 3.11204 9.2751 0.06926 0.0001 ***
## Residuals 111 37.243 0.33553 0.82882
## Total 114 44.935 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## $`Lower Keys_vs_Upper Keys`
## Permutation: free
## Number of permutations: 9999
##
## Terms added sequentially (first to last)
##
## Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
## region 1 3.424 3.4239 11.299 0.07154 0.0001 ***
## depthZone 1 4.199 4.1990 13.857 0.08773 0.0001 ***
## region:depthZone 1 5.390 5.3897 17.786 0.11261 0.0001 ***
## Residuals 115 34.849 0.3030 0.72812
## Total 118 47.861 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## attr(,"class")
## [1] "pwadstrata" "list"
We can use principal coordinates analysis (PCoA) to visualize our samples in multivariate space based on ITS2 type profiles. First we need to set up data for PCoA analysis in R.
We will create a distance matrix with Bray-Curtis similarity using the package vegan
its2Dist = vegdist(decostand(its2Profs[, c(6:ncol(its2Profs))], "normalize"), method = "bray")
This is the actual PCoA step
its2Mds = cmdscale(its2Dist, eig = TRUE, x.ret = TRUE)
Calculate the eigenvalues so we can figure out % variation shown on each Principal Coordinate Axis
its2Var = round(its2Mds$eig/sum(its2Mds$eig)*100, 1)
its2Var
## [1] 28.5 20.5 15.2 10.1 7.5 6.7 4.3 3.5 2.0 1.8 0.8 0.6 0.5 0.2 0.2 0.1
## [17] 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
## [33] 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
## [49] 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
## [65] 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
## [81] 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
## [97] 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
## [113] 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
## [129] 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
## [145] 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
## [161] 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
## [177] 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
## [193] 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -0.1 -0.1
## [209] -0.1 -0.1 -0.1 -0.1 -0.2 -0.2 -0.2 -0.3 -0.3 -0.3 -0.3
Formatting the data to plot
its2Values = its2Mds$points
its2Values =as.data.frame(its2Values, sample = rownames(its2Values))
its2PcoaDf = data.frame(sample = rownames(its2Values), site = factor(its2Profs$region, levels = levels(its2Profs$region)[4:1]),
depth = as.factor(its2Profs$depthZone),PCo1 = its2Values[,1],
PCo2 = its2Values[,2])
head(its2PcoaDf)
## sample site depth PCo1 PCo2
## 1 1 Riley's Hump Shallow -0.4018591 -0.4503204
## 2 2 Riley's Hump Shallow -0.4018591 -0.4503204
## 3 3 Riley's Hump Shallow -0.4018591 -0.4503204
## 4 4 Riley's Hump Shallow -0.4018591 -0.4503204
## 5 5 Riley's Hump Shallow -0.4018591 -0.4503204
## 6 6 Riley's Hump Shallow -0.4018591 -0.4503204
PCoA Biplot
set.seed(694)
its2PcoaA = ggplot(its2PcoaDf, aes(x = PCo1, y = PCo2, color = site, fill = site, shape = depth, linetype = depth)) +
geom_hline(yintercept = 0, color = "gray90", size = 0.5) +
geom_vline(xintercept = 0, color = "gray90", size = 0.5) +
stat_ellipse(data = its2PcoaDf, type = "t", geom = "polygon", alpha = 0.1, fill = NA) +
geom_point(aes(shape = factor(depth), fill = site),
color = "black", size = 4, position = position_jitter(width = 0.01, height = 0.01)) +
scale_fill_manual(values = flPal, name = "Region") +
scale_color_manual(values = flPal, name = "Region") +
scale_shape_manual(values = c(24,25), name = "Depth Zone") +
scale_linetype_manual(values = c(1,2), name = "Depth Zone")+
# guides(fill = guide_legend(override.aes = list(shape = 22, size = 5.75, color = "white")), size = FALSE, color = FALSE, linetype = ) +
guides(shape = guide_legend(order = 2), linetype = guide_legend(override.aes = list(linetype = c(1,2), alpha = 1, color = "black", fill = NA), order = 3), fill = guide_legend(override.aes = list(shape = 22, size = 5, color = NA, alpha = NA), order = 1), color = FALSE)+
xlab(paste ("PCo 1 (", its2Var[1],"%)", sep = "")) +
ylab(paste ("PCo 2 (", its2Var[2],"%)", sep = "")) +
annotate(geom = "text", x = 0.11, y = -.78, label = bquote(~bold("PERMANOVA:"))) +
annotate(geom = "text", x = 0.48, y = -.88, size = 3, label =
bquote(~bold("Region:")~italic(PseudoF)~" = "~.(adonisRegionF)*","~italic(p)~" = "~.(adonisRegionP))) +
annotate(geom = "text", x = 0.48, y = -.98, size = 3, label =
bquote(~bold("Depth:")~italic(PseudoF)~" = "~.(adonisDepthF)*","~italic(p)~" = "~.(adonisDepthP))) +
annotate(geom = "text", x = 0.55, y = -1.08, size = 3, label =
bquote(~bold("Interaction:")~italic(PseudoF)~" = "~.(adonisIntF)*","~italic(p)~" = "~.(adonisIntP))) +
theme_bw()
its2Pcoa = its2PcoaA +
theme(axis.title.x = element_text(color = "black", size = 12),
axis.text.x = element_blank(),
axis.ticks.x = element_blank(),
axis.line.x = element_blank(),
axis.title.y = element_text(color = "black", size = 12),
axis.text.y = element_blank(),
axis.ticks.y = element_blank(),
axis.line.y = element_blank(),
legend.position = "right",
legend.title = element_text(color = "black", size = 12),
legend.text = element_text(color = "black", size = 12),
legend.key = element_blank(),
panel.border = element_rect(color = "black", size = 1.2),
panel.background = element_rect(fill = "white"),
plot.background = element_rect(fill = "white"),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank()
)
# its2Pcoa
ggsave("../figures/its2PCoA.png", plot = its2Pcoa, width = 16, height = 10, unit= "cm", dpi = 300)
ggsave("../figures/its2PCoA.eps", plot = its2Pcoa, width = 16, height = 10, unit = "cm", dpi = 300)
ggsave("../figures/its2PCoA.tiff", plot = its2Pcoa, width = 16, height = 10, unit= "cm", dpi = 300)
its2Dist = its2Profs[c(6:ncol(its2Profs))] %>% decostand("normalize") %>% vegdist(method = "bray")
sintIBS = read.table("../data/snps/sintNoClones.ibsMat")[-131,-131] %>% as.matrix() %>% as.dist(diag = FALSE)
set.seed(694)
fkSintMantel = mantel.randtest(sintIBS, its2Dist, nrepet = 9999)
fkSintMantel
## Monte-Carlo test
## Call: mantel.randtest(m1 = sintIBS, m2 = its2Dist, nrepet = 9999)
##
## Observation: 0.0105533
##
## Based on 9999 replicates
## Simulated p-value: 0.1379
## Alternative hypothesis: greater
##
## Std.Obs Expectation Variance
## 1.05362461603 -0.00003868955 0.00010106088